From abc9fbe1c3e4c1a68ffa665113eccced630b9e1b Mon Sep 17 00:00:00 2001 From: Citus Team Date: Fri, 31 Jan 2020 13:24:43 +0300 Subject: [PATCH 001/124] Squash of original cstore_fdw --- .gitignore | 57 + .travis.yml | 42 + LICENSE | 201 + META.json | 45 + Makefile | 59 + README.md | 373 ++ TODO.md | 41 + cstore.proto | 51 + cstore_compression.c | 171 + cstore_fdw--1.0--1.1.sql | 26 + cstore_fdw--1.1--1.2.sql | 3 + cstore_fdw--1.2--1.3.sql | 3 + cstore_fdw--1.3--1.4.sql | 3 + cstore_fdw--1.4--1.5.sql | 28 + cstore_fdw--1.5--1.6.sql | 19 + cstore_fdw--1.6--1.7.sql | 3 + cstore_fdw--1.7.sql | 60 + cstore_fdw.c | 2414 +++++++ cstore_fdw.control | 5 + cstore_fdw.h | 353 + cstore_metadata_serialization.c | 581 ++ cstore_metadata_serialization.h | 42 + cstore_reader.c | 1383 ++++ cstore_version_compat.h | 58 + cstore_writer.c | 1017 +++ data/array_types.csv | 3 + data/block_filtering.csv | 10000 ++++++++++++++++++++++++++++ data/contestants.1.csv | 5 + data/contestants.2.csv | 3 + data/datetime_types.csv | 2 + data/enum_and_composite_types.csv | 2 + data/null_values.csv | 2 + data/other_types.csv | 2 + data/range_types.csv | 2 + expected/alter.out | 178 + expected/analyze.out | 19 + expected/drop.out | 97 + expected/functions.out | 18 + expected/insert.out | 88 + expected/query.out | 105 + expected/truncate.out | 262 + expected/truncate_0.out | 262 + input/block_filtering.source | 71 + input/copyto.source | 18 + input/create.source | 49 + input/data_types.source | 74 + input/load.source | 44 + output/block_filtering.source | 118 + output/copyto.source | 24 + output/create.source | 50 + output/data_types.source | 84 + output/load.source | 39 + sql/alter.sql | 85 + sql/analyze.sql | 11 + sql/drop.sql | 76 + sql/functions.sql | 20 + sql/insert.sql | 56 + sql/query.sql | 34 + sql/truncate.sql | 135 + 59 files changed, 19076 insertions(+) create mode 100644 .gitignore create mode 100644 .travis.yml create mode 100644 LICENSE create mode 100644 META.json create mode 100644 Makefile create mode 100644 README.md create mode 100644 TODO.md create mode 100644 cstore.proto create mode 100644 cstore_compression.c create mode 100644 cstore_fdw--1.0--1.1.sql create mode 100644 cstore_fdw--1.1--1.2.sql create mode 100644 cstore_fdw--1.2--1.3.sql create mode 100644 cstore_fdw--1.3--1.4.sql create mode 100644 cstore_fdw--1.4--1.5.sql create mode 100644 cstore_fdw--1.5--1.6.sql create mode 100644 cstore_fdw--1.6--1.7.sql create mode 100644 cstore_fdw--1.7.sql create mode 100644 cstore_fdw.c create mode 100644 cstore_fdw.control create mode 100644 cstore_fdw.h create mode 100644 cstore_metadata_serialization.c create mode 100644 cstore_metadata_serialization.h create mode 100644 cstore_reader.c create mode 100644 cstore_version_compat.h create mode 100644 cstore_writer.c create mode 100644 data/array_types.csv create mode 100644 data/block_filtering.csv create mode 100644 data/contestants.1.csv create mode 100644 data/contestants.2.csv create mode 100644 data/datetime_types.csv create mode 100644 data/enum_and_composite_types.csv create mode 100644 data/null_values.csv create mode 100644 data/other_types.csv create mode 100644 data/range_types.csv create mode 100644 expected/alter.out create mode 100644 expected/analyze.out create mode 100644 expected/drop.out create mode 100644 expected/functions.out create mode 100644 expected/insert.out create mode 100644 expected/query.out create mode 100644 expected/truncate.out create mode 100644 expected/truncate_0.out create mode 100644 input/block_filtering.source create mode 100644 input/copyto.source create mode 100644 input/create.source create mode 100644 input/data_types.source create mode 100644 input/load.source create mode 100644 output/block_filtering.source create mode 100644 output/copyto.source create mode 100644 output/create.source create mode 100644 output/data_types.source create mode 100644 output/load.source create mode 100644 sql/alter.sql create mode 100644 sql/analyze.sql create mode 100644 sql/drop.sql create mode 100644 sql/functions.sql create mode 100644 sql/insert.sql create mode 100644 sql/query.sql create mode 100644 sql/truncate.sql diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..f95fd0b87 --- /dev/null +++ b/.gitignore @@ -0,0 +1,57 @@ +# ===== +# = C = +# ===== + +# Object files +*.o +*.ko +*.obj +*.elf + +# Libraries +*.lib +*.a + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.app +*.i*86 +*.x86_64 +*.hex + +# ======== +# = Gcov = +# ======== + +# gcc coverage testing tool files + +*.gcno +*.gcda +*.gcov + +# ==================== +# = Project-Specific = +# ==================== + +/data/*.cstore +/data/*.footer + +/sql/block_filtering.sql +/sql/copyto.sql +/sql/create.sql +/sql/data_types.sql +/sql/load.sql + +/expected/block_filtering.out +/expected/copyto.out +/expected/create.out +/expected/data_types.out +/expected/load.out + +*.pb-c.* diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..f83f7206e --- /dev/null +++ b/.travis.yml @@ -0,0 +1,42 @@ +sudo: required +dist: bionic +language: c +cache: + apt: true + directories: + - /home/travis/postgresql +env: + global: + - enable_coverage=yes + - PG_PRELOAD=cstore_fdw + matrix: + - PGVERSION=9.3 + - PGVERSION=9.4 + - PGVERSION=9.5 + - PGVERSION=9.6 + - PGVERSION=10 + - PGVERSION=11 + - PGVERSION=12 + +before_install: + - git clone -b v0.7.13 --depth 1 https://github.com/citusdata/tools.git + - sudo make -C tools install + - setup_apt + - nuke_pg +install: + - sudo apt-get install protobuf-c-compiler + - sudo apt-get install libprotobuf-c0-dev + - sudo locale-gen da_DK + - sudo locale-gen da_DK.utf8 + - sudo pip install cpp-coveralls + - install_pg + - install_custom_pg +before_script: + - chmod 777 . + - chmod 777 data + - chmod 666 data/* + - config_and_start_cluster +script: pg_travis_test +after_success: + - sudo chmod 666 *.gcda + - coveralls --exclude cstore.pb-c.c --exclude cstore.pb-c.h diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..ad410e113 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/META.json b/META.json new file mode 100644 index 000000000..19e819daa --- /dev/null +++ b/META.json @@ -0,0 +1,45 @@ +{ + "name": "cstore_fdw", + "abstract": "Columnar Store for PostgreSQL", + "description": "PostgreSQL extension which implements a Columnar Store.", + "version": "1.7.0", + "maintainer": "Murat Tuncer ", + "license": "apache_2_0", + "provides": { + "cstore_fdw": { + "abstract": "Foreign Data Wrapper for Columnar Store Tables", + "file": "cstore_fdw--1.7.sql", + "docfile": "README.md", + "version": "1.7.0" + } + }, + "prereqs": { + "runtime": { + "requires": { + "PostgreSQL": "9.3.0" + } + } + }, + "resources": { + "bugtracker": { + "web": "http://github.com/citusdata/cstore_fdw/issues/" + }, + "repository": { + "url": "git://github.com/citusdata/cstore_fdw.git", + "web": "https://github.com/citusdata/cstore_fdw/", + "type": "git" + } + }, + "generated_by": "Murat Tuncer", + "meta-spec": { + "version": "1.0.0", + "url": "http://pgxn.org/meta/spec.txt" + }, + "tags": [ + "orc", + "fdw", + "foreign data wrapper", + "cstore_fdw", + "columnar store" + ] +} diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..72daebc55 --- /dev/null +++ b/Makefile @@ -0,0 +1,59 @@ +# cstore_fdw/Makefile +# +# Copyright (c) 2016 Citus Data, Inc. +# + +MODULE_big = cstore_fdw + +PG_CPPFLAGS = --std=c99 +SHLIB_LINK = -lprotobuf-c +OBJS = cstore.pb-c.o cstore_fdw.o cstore_writer.o cstore_reader.o \ + cstore_metadata_serialization.o cstore_compression.o + +EXTENSION = cstore_fdw +DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ + cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ + cstore_fdw--1.0--1.1.sql + +REGRESS = create load query analyze data_types functions block_filtering drop \ + insert copyto alter truncate +EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ + sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ + sql/copyto.sql expected/block_filtering.out expected/create.out \ + expected/data_types.out expected/load.out expected/copyto.out + +ifeq ($(enable_coverage),yes) + PG_CPPFLAGS += --coverage + SHLIB_LINK += --coverage + EXTRA_CLEAN += *.gcno +endif + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Darwin) + PG_CPPFLAGS += -I/usr/local/include +endif + +# +# Users need to specify their Postgres installation path through pg_config. For +# example: /usr/local/pgsql/bin/pg_config or /usr/lib/postgresql/9.3/bin/pg_config +# + +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) + +ifndef MAJORVERSION + MAJORVERSION := $(basename $(VERSION)) +endif + +ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) + $(error PostgreSQL 9.3 to 12 is required to compile this extension) +endif + +cstore.pb-c.c: cstore.proto + protoc-c --c_out=. cstore.proto + +installcheck: remove_cstore_files + +remove_cstore_files: + rm -f data/*.cstore data/*.cstore.footer diff --git a/README.md b/README.md new file mode 100644 index 000000000..1a20f3abe --- /dev/null +++ b/README.md @@ -0,0 +1,373 @@ +cstore_fdw +========== + +[![Build Status](https://travis-ci.org/citusdata/cstore_fdw.svg?branch=master)][status] +[![Coverage](http://img.shields.io/coveralls/citusdata/cstore_fdw/master.svg)][coverage] + +Cstore_fdw is an open source columnar store extension for PostgreSQL. Columnar stores provide notable benefits for analytics use cases where data is loaded in batches. Cstore_fdw’s columnar nature delivers performance by only reading relevant data from disk, and it may compress data 6x-10x to reduce space requirements for data archival. + +Cstore_fdw is developed by [Citus Data](https://www.citusdata.com) and can be used in combination with [Citus](https://github.com/citusdata/citus), a postgres extension that intelligently distributes your data and queries across many nodes so your database can scale and your queries are fast. If you have any questions about how Citus can help you scale or how to use Citus in combination with cstore_fdw, [please let us know](https://www.citusdata.com/about/contact_us/). + +Join the [Mailing List][mailing-list] to stay on top of the latest developments for Cstore_fdw. + + +Introduction +------------ + +This extension uses a format for its data layout that is inspired by ORC, +the Optimized Row Columnar format. Like ORC, the cstore format improves +upon RCFile developed at Facebook, and brings the following benefits: + +* Compression: Reduces in-memory and on-disk data size by 2-4x. Can be extended + to support different codecs. +* Column projections: Only reads column data relevant to the query. Improves + performance for I/O bound queries. +* Skip indexes: Stores min/max statistics for row groups, and uses them to skip + over unrelated rows. + +Further, we used the Postgres foreign data wrapper APIs and type representations +with this extension. This brings: + +* Support for 40+ Postgres data types. The user can also create new types and + use them. +* Statistics collection. PostgreSQL's query optimizer uses these stats to + evaluate different query plans and pick the best one. +* Simple setup. Create foreign table and copy data. Run SQL. + + +Building +-------- + +cstore\_fdw depends on protobuf-c for serializing and deserializing table metadata. +So we need to install these packages first: + + # Fedora 17+, CentOS, and Amazon Linux + sudo yum install protobuf-c-devel + + # Ubuntu 10.4+ + sudo apt-get install protobuf-c-compiler + sudo apt-get install libprotobuf-c0-dev + + # Ubuntu 18.4+ + sudo apt-get install protobuf-c-compiler + sudo apt-get install libprotobuf-c-dev + + # Mac OS X + brew install protobuf-c + +**Note.** In CentOS 5, 6, and 7, you may need to install or update EPEL 5, 6, or 7 repositories. + See [this page](https://support.rackspace.com/how-to/install-epel-and-additional-repositories-on-centos-and-red-hat/) +for instructions. + +**Note.** In Amazon Linux, the EPEL repository is installed by default, but not +enabled. See [these instructions](http://aws.amazon.com/amazon-linux-ami/faqs/#epel) +for how to enable it. + +Once you have protobuf-c installed on your machine, you are ready to build +cstore\_fdw. For this, you need to include the pg\_config directory path in +your make command. This path is typically the same as your PostgreSQL +installation's bin/ directory path. For example: + + PATH=/usr/local/pgsql/bin/:$PATH make + sudo PATH=/usr/local/pgsql/bin/:$PATH make install + +**Note.** cstore_fdw requires PostgreSQL version from 9.3 to 12. It doesn't +support earlier versions of PostgreSQL. + + +Usage +----- + +Before using cstore\_fdw, you need to add it to ```shared_preload_libraries``` +in your ```postgresql.conf``` and restart Postgres: + + shared_preload_libraries = 'cstore_fdw' # (change requires restart) + +The following parameters can be set on a cstore foreign table object. + +* filename (optional): The absolute path to the location for storing table data. + If you don't specify the filename option, cstore\_fdw will automatically + choose the $PGDATA/cstore\_fdw directory to store the files. If specified the + value of this parameter will be used as a prefix for all files created to + store table data. For example, the value ```/cstore_fdw/my_table``` could result in + the files ```/cstore_fdw/my_table``` and ```/cstore_fdw/my_table.footer``` being used + to manage table data. +* compression (optional): The compression used for compressing value streams. + Valid options are ```none``` and ```pglz```. The default is ```none```. +* stripe\_row\_count (optional): Number of rows per stripe. The default is + ```150000```. Reducing this decreases the amount memory used for loading data + and querying, but also decreases the performance. +* block\_row\_count (optional): Number of rows per column block. The default is + ```10000```. cstore\_fdw compresses, creates skip indexes, and reads from disk + at the block granularity. Increasing this value helps with compression and results + in fewer reads from disk. However, higher values also reduce the probability of + skipping over unrelated row blocks. + + +To load or append data into a cstore table, you have two options: + +* You can use the [```COPY``` command][copy-command] to load or append data from + a file, a program, or STDIN. +* You can use the ```INSERT INTO cstore_table SELECT ...``` syntax to load or + append data from another table. + +You can use the [```ANALYZE``` command][analyze-command] to collect statistics +about the table. These statistics help the query planner to help determine the +most efficient execution plan for each query. + +**Note.** We currently don't support updating table using DELETE, and UPDATE +commands. We also don't support single row inserts. + + +Updating from earlier versions to 1.7 +--------------------------------------- + +To update an existing cstore_fdw installation from versions earlier than 1.6 +you can take the following steps: + +* Download and install cstore_fdw version 1.6 using instructions from the "Building" + section, +* Restart the PostgreSQL server, +* Run ```ALTER EXTENSION cstore_fdw UPDATE;``` + + +Example +------- + +As an example, we demonstrate loading and querying data to/from a column store +table from scratch here. Let's start with downloading and decompressing the data +files. + + wget http://examples.citusdata.com/customer_reviews_1998.csv.gz + wget http://examples.citusdata.com/customer_reviews_1999.csv.gz + + gzip -d customer_reviews_1998.csv.gz + gzip -d customer_reviews_1999.csv.gz + +Then, let's log into Postgres, and run the following commands to create a column +store foreign table: + +```SQL +-- load extension first time after install +CREATE EXTENSION cstore_fdw; + +-- create server object +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + +-- create foreign table +CREATE FOREIGN TABLE customer_reviews +( + customer_id TEXT, + review_date DATE, + review_rating INTEGER, + review_votes INTEGER, + review_helpful_votes INTEGER, + product_id CHAR(10), + product_title TEXT, + product_sales_rank BIGINT, + product_group TEXT, + product_category TEXT, + product_subcategory TEXT, + similar_product_ids CHAR(10)[] +) +SERVER cstore_server +OPTIONS(compression 'pglz'); +``` + +Next, we load data into the table: + +```SQL +\COPY customer_reviews FROM 'customer_reviews_1998.csv' WITH CSV; +\COPY customer_reviews FROM 'customer_reviews_1999.csv' WITH CSV; +``` + +**Note.** If you are getting ```ERROR: cannot copy to foreign table +"customer_reviews"``` when trying to run the COPY commands, double check that you +have added cstore\_fdw to ```shared_preload_libraries``` in ```postgresql.conf``` +and restarted Postgres. + +Next, we collect data distribution statistics about the table. This is optional, +but usually very helpful: + +```SQL +ANALYZE customer_reviews; +``` + +Finally, let's run some example SQL queries on the column store table. + +```SQL +-- Find all reviews a particular customer made on the Dune series in 1998. +SELECT + customer_id, review_date, review_rating, product_id, product_title +FROM + customer_reviews +WHERE + customer_id ='A27T7HVDXA3K2A' AND + product_title LIKE '%Dune%' AND + review_date >= '1998-01-01' AND + review_date <= '1998-12-31'; + +-- Do we have a correlation between a book's title's length and its review ratings? +SELECT + width_bucket(length(product_title), 1, 50, 5) title_length_bucket, + round(avg(review_rating), 2) AS review_average, + count(*) +FROM + customer_reviews +WHERE + product_group = 'Book' +GROUP BY + title_length_bucket +ORDER BY + title_length_bucket; +``` + + +Usage with Citus +---------------- + +The example above illustrated how to load data into a PostgreSQL database running +on a single host. However, sometimes your data is too large to analyze effectively +on a single host. Citus is a product built by Citus Data that allows you to run +a distributed PostgreSQL database to analyze your data using the power of multiple +hosts. You can easily install and run other PostgreSQL extensions and foreign data +wrappers—including cstore_fdw—alongside Citus. + +You can create a cstore_fdw table and distribute it using the +```create_distributed_table()``` UDF just like any other table. You can load data +using the ```copy``` command as you would do in single node PostgreSQL. + +Using Skip Indexes +------------------ + +cstore_fdw partitions each column into multiple blocks. Skip indexes store minimum +and maximum values for each of these blocks. While scanning the table, if min/max +values of the block contradict the WHERE clause, then the block is completely +skipped. This way, the query processes less data and hence finishes faster. + +To use skip indexes more efficiently, you should load the data after sorting it +on a column that is commonly used in the WHERE clause. This ensures that there is +a minimum overlap between blocks and the chance of them being skipped is higher. + +In practice, the data generally has an inherent dimension (for example a time field) +on which it is naturally sorted. Usually, the queries also have a filter clause on +that column (for example you want to query only the last week's data), and hence you +don't need to sort the data in such cases. + + +Uninstalling cstore_fdw +----------------------- + +Before uninstalling the extension, first you need to drop all the cstore tables: + + postgres=# DROP FOREIGN TABLE cstore_table_1; + ... + postgres=# DROP FOREIGN TABLE cstore_table_n; + +Then, you should drop the cstore server and extension: + + postgres=# DROP SERVER cstore_server; + postgres=# DROP EXTENSION cstore_fdw; + +cstore\_fdw automatically creates some directories inside the PostgreSQL's data +directory to store its files. To remove them, you can run: + + $ rm -rf $PGDATA/cstore_fdw + +Then, you should remove cstore\_fdw from ```shared_preload_libraries``` in +your ```postgresql.conf```: + + shared_preload_libraries = '' # (change requires restart) + +Finally, to uninstall the extension you can run the following command in the +extension's source code directory. This will clean up all the files copied during +the installation: + + $ sudo PATH=/usr/local/pgsql/bin/:$PATH make uninstall + + +Changeset +--------- +### Version 1.7.0 +* (Fix) Add support for PostgreSQL 12 +* (Fix) Support count(t.*) from t type queries +* (Fix) Build failures for MacOS 10.14+ +* (Fix) Make foreign scan parallel safe +* (Fix) Add support for PostgreSQL 11 COPY +### Version 1.6.2 +* (Fix) Add support for PostgreSQL 11 +### Version 1.6.1 +* (Fix) Fix crash during truncate (Cstore crashing server when enabled, not used) +* (Fix) No such file or directory warning when attempting to drop database +### Version 1.6 +* (Feature) Added support for PostgreSQL 10. +* (Fix) Removed table files when a schema, extension or database is dropped. +* (Fix) Removed unused code fragments. +* (Fix) Fixed incorrect initialization of stripe buffers. +* (Fix) Checked user access rights when executing truncate. +* (Fix) Made copy command cancellable. +* (Fix) Fixed namespace issue regarding drop table. + +### Version 1.5.1 +* (Fix) Verify cstore_fdw server on CREATE FOREIGN TABLE command + +### Version 1.5 +* (Feature) Added support for PostgreSQL 9.6. +* (Fix) Removed table data when cstore_fdw table is indirectly dropped. +* (Fix) Removed unused code fragments. +* (Fix) Fixed column selection logic to return columns used in expressions. +* (Fix) Prevented alter table command from changinf column type to incompatible types. + +### Version 1.4.1 + +* (Fix) Compatibility fix for Citus [copy command][copy-command]. + +### Version 1.4 + +* (Feature) Added support for ```TRUNCATE TABLE``` +* (Fix) Added support for PostgreSQL 9.5 + +### Version 1.3 + +* (Feature) Added support for ```ALTER TABLE ADD COLUMN``` and ```ALTER TABLE DROP COLUMN```. +* (Feature) Added column list support in ```COPY FROM```. +* (Optimization) Improve row count estimation, which results in better plans. +* (Fix) Fix the deadlock issue during concurrent inserts. +* (Fix) Return correct result when using whole row references. + +### Version 1.2 + +* (Feature) Added support for ```COPY TO```. +* (Feature) Added support for ```INSERT INTO cstore_table SELECT ...```. +* (Optimization) Improved memory usage. +* (Fix) Dropping multiple cstore tables in a single command cleans-up files + of all them. + +### Version 1.1 + +* (Feature) Make filename option optional, and use a default directory inside + $PGDATA to manage cstore tables. +* (Feature) Automatically delete files on DROP FOREIGN TABLE. +* (Fix) Return empty table if no data has been loaded. Previously, cstore_fdw + errored out. +* (Fix) Fix overestimating relation column counts when planning. +* (Feature) Added cstore\_table\_size(tablename) for getting the size of a cstore + table in bytes. + + +Copyright +--------- + +Copyright (c) 2017 Citus Data, Inc. + +This module is free software; you can redistribute it and/or modify it under the +Apache v2.0 License. + +For all types of questions and comments about the wrapper, please contact us at +engage @ citusdata.com. + +[status]: https://travis-ci.org/citusdata/cstore_fdw +[mailing-list]: https://groups.google.com/forum/#!forum/cstore-users +[coverage]: https://coveralls.io/r/citusdata/cstore_fdw +[copy-command]: http://www.postgresql.org/docs/current/static/sql-copy.html +[analyze-command]: http://www.postgresql.org/docs/current/static/sql-analyze.html diff --git a/TODO.md b/TODO.md new file mode 100644 index 000000000..179fbc8c7 --- /dev/null +++ b/TODO.md @@ -0,0 +1,41 @@ +To see the list of features and bug-fixes planned for next releases, see our +[development roadmap][roadmap]. + +Requested Features +------------------ + +* Improve write performance +* Improve read performance +* Add checksum logic +* Add new compression methods +* Enable INSERT/DELETE/UPDATE +* Enable users other than superuser to safely create columnar tables (permissions) +* Transactional semantics +* Add config setting to make pg\_fsync() optional + + +Known Issues +------------ + +* Copy command ignores NOT NULL constraints. +* Planning functions don't take into account average column width. +* Planning functions don't correctly take into account block skipping benefits. +* On 32-bit platforms, when file size is outside the 32-bit signed range, EXPLAIN + command prints incorrect file size. +* If two different columnar tables are configured to point to the same file, + writes to the underlying file aren't protected from each other. +* When a data load is in progress, concurrent reads on the table overestimate the + page count. +* We have a minor memory leak in CStoreEndWrite. We need to also free the + comparisonFunctionArray. +* block\_filtering test fails on Ubuntu because the "da\_DK" locale is not enabled + by default. +* We don't yet incorporate the compression method's impact on disk I/O into cost + estimates. +* CitusDB integration errors: +* Concurrent staging cstore\_fdw tables doesn't work. +* Setting a default value for column with ALTER TABLE has limited support for + existing rows. + +[roadmap]: https://github.com/citusdata/cstore_fdw/wiki/Roadmap + diff --git a/cstore.proto b/cstore.proto new file mode 100644 index 000000000..6e24c9075 --- /dev/null +++ b/cstore.proto @@ -0,0 +1,51 @@ +syntax = "proto2"; + +package protobuf; + +enum CompressionType { + // Values should match with the corresponding struct in cstore_fdw.h + NONE = 0; + PG_LZ = 1; +}; + +message ColumnBlockSkipNode { + optional uint64 rowCount = 1; + optional bytes minimumValue = 2; + optional bytes maximumValue = 3; + optional uint64 valueBlockOffset = 4; + optional uint64 valueLength = 5; + optional CompressionType valueCompressionType = 6; + optional uint64 existsBlockOffset = 7; + optional uint64 existsLength = 8; +} + +message ColumnBlockSkipList { + repeated ColumnBlockSkipNode blockSkipNodeArray = 1; +} + +message StripeFooter { + repeated uint64 skipListSizeArray = 1; + repeated uint64 existsSizeArray = 2; + repeated uint64 valueSizeArray = 3; +} + +message StripeMetadata { + optional uint64 fileOffset = 1; + optional uint64 skipListLength = 2; + optional uint64 dataLength = 3; + optional uint64 footerLength = 4; +} + +message TableFooter { + repeated StripeMetadata stripeMetadataArray = 1; + optional uint32 blockRowCount = 2; +} + +message PostScript { + optional uint64 tableFooterLength = 1; + optional uint64 versionMajor = 2; + optional uint64 versionMinor = 3; + + // Leave this last in the record + optional string magicNumber = 8000; +} diff --git a/cstore_compression.c b/cstore_compression.c new file mode 100644 index 000000000..3b37fd47a --- /dev/null +++ b/cstore_compression.c @@ -0,0 +1,171 @@ +/*------------------------------------------------------------------------- + * + * cstore_compression.c + * + * This file contains compression/decompression functions definitions + * used in cstore_fdw. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "cstore_fdw.h" + +#if PG_VERSION_NUM >= 90500 +#include "common/pg_lzcompress.h" +#else +#include "utils/pg_lzcompress.h" +#endif + + + + +#if PG_VERSION_NUM >= 90500 +/* + * The information at the start of the compressed data. This decription is taken + * from pg_lzcompress in pre-9.5 version of PostgreSQL. + */ +typedef struct CStoreCompressHeader +{ + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 rawsize; +} CStoreCompressHeader; + +/* + * Utilities for manipulation of header information for compressed data + */ + +#define CSTORE_COMPRESS_HDRSZ ((int32) sizeof(CStoreCompressHeader)) +#define CSTORE_COMPRESS_RAWSIZE(ptr) (((CStoreCompressHeader *) (ptr))->rawsize) +#define CSTORE_COMPRESS_RAWDATA(ptr) (((char *) (ptr)) + CSTORE_COMPRESS_HDRSZ) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = (len)) + +#else + +#define CSTORE_COMPRESS_HDRSZ (0) +#define CSTORE_COMPRESS_RAWSIZE(ptr) (PGLZ_RAW_SIZE((PGLZ_Header *) buffer->data)) +#define CSTORE_COMPRESS_RAWDATA(ptr) (((PGLZ_Header *) (ptr))) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = (len)) + +#endif + + + +/* + * CompressBuffer compresses the given buffer with the given compression type + * outputBuffer enlarged to contain compressed data. The function returns true + * if compression is done, returns false if compression is not done. + * outputBuffer is valid only if the function returns true. + */ +bool +CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, + CompressionType compressionType) +{ + uint64 maximumLength = PGLZ_MAX_OUTPUT(inputBuffer->len) + CSTORE_COMPRESS_HDRSZ; + bool compressionResult = false; +#if PG_VERSION_NUM >= 90500 + int32 compressedByteCount = 0; +#endif + + if (compressionType != COMPRESSION_PG_LZ) + { + return false; + } + + resetStringInfo(outputBuffer); + enlargeStringInfo(outputBuffer, maximumLength); + +#if PG_VERSION_NUM >= 90500 + compressedByteCount = pglz_compress((const char *) inputBuffer->data, + inputBuffer->len, + CSTORE_COMPRESS_RAWDATA(outputBuffer->data), + PGLZ_strategy_always); + if (compressedByteCount >= 0) + { + CSTORE_COMPRESS_SET_RAWSIZE(outputBuffer->data, inputBuffer->len); + SET_VARSIZE_COMPRESSED(outputBuffer->data, + compressedByteCount + CSTORE_COMPRESS_HDRSZ); + compressionResult = true; + } +#else + + compressionResult = pglz_compress(inputBuffer->data, inputBuffer->len, + CSTORE_COMPRESS_RAWDATA(outputBuffer->data), + PGLZ_strategy_always); +#endif + + if (compressionResult) + { + outputBuffer->len = VARSIZE(outputBuffer->data); + } + + return compressionResult; +} + + +/* + * DecompressBuffer decompresses the given buffer with the given compression + * type. This function returns the buffer as-is when no compression is applied. + */ +StringInfo +DecompressBuffer(StringInfo buffer, CompressionType compressionType) +{ + StringInfo decompressedBuffer = NULL; + + Assert(compressionType == COMPRESSION_NONE || compressionType == COMPRESSION_PG_LZ); + + if (compressionType == COMPRESSION_NONE) + { + /* in case of no compression, return buffer */ + decompressedBuffer = buffer; + } + else if (compressionType == COMPRESSION_PG_LZ) + { + uint32 compressedDataSize = VARSIZE(buffer->data) - CSTORE_COMPRESS_HDRSZ; + uint32 decompressedDataSize = CSTORE_COMPRESS_RAWSIZE(buffer->data); + char *decompressedData = NULL; +#if PG_VERSION_NUM >= 90500 + int32 decompressedByteCount = 0; +#endif + + if (compressedDataSize + CSTORE_COMPRESS_HDRSZ != buffer->len) + { + ereport(ERROR, (errmsg("cannot decompress the buffer"), + errdetail("Expected %u bytes, but received %u bytes", + compressedDataSize, buffer->len))); + } + + decompressedData = palloc0(decompressedDataSize); + +#if PG_VERSION_NUM >= 90500 + +#if PG_VERSION_NUM >= 120000 + decompressedByteCount = pglz_decompress(CSTORE_COMPRESS_RAWDATA(buffer->data), + compressedDataSize, decompressedData, + decompressedDataSize, true); +#else + decompressedByteCount = pglz_decompress(CSTORE_COMPRESS_RAWDATA(buffer->data), + compressedDataSize, decompressedData, + decompressedDataSize); +#endif + + if (decompressedByteCount < 0) + { + ereport(ERROR, (errmsg("cannot decompress the buffer"), + errdetail("compressed data is corrupted"))); + } +#else + pglz_decompress((PGLZ_Header *) buffer->data, decompressedData); +#endif + + decompressedBuffer = palloc0(sizeof(StringInfoData)); + decompressedBuffer->data = decompressedData; + decompressedBuffer->len = decompressedDataSize; + decompressedBuffer->maxlen = decompressedDataSize; + } + + return decompressedBuffer; +} diff --git a/cstore_fdw--1.0--1.1.sql b/cstore_fdw--1.0--1.1.sql new file mode 100644 index 000000000..9e8029638 --- /dev/null +++ b/cstore_fdw--1.0--1.1.sql @@ -0,0 +1,26 @@ +/* cstore_fdw/cstore_fdw--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION UPDATE +\echo Use "ALTER EXTENSION cstore_fdw UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION cstore_ddl_event_end_trigger() +RETURNS event_trigger +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE EVENT TRIGGER cstore_ddl_event_end +ON ddl_command_end +EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); + +CREATE FUNCTION cstore_table_size(relation regclass) +RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +-- cstore_fdw creates directories to store files for tables with automatically +-- determined filename during the CREATE SERVER statement. Since this feature +-- was newly added in v1.1, servers created with v1.0 did not create them. So, +-- we create a server with v1.1 to ensure that the required directories are +-- created to allow users to create automatically managed tables with old servers. +CREATE SERVER cstore_server_for_updating_1_0_to_1_1 FOREIGN DATA WRAPPER cstore_fdw; +DROP SERVER cstore_server_for_updating_1_0_to_1_1; diff --git a/cstore_fdw--1.1--1.2.sql b/cstore_fdw--1.1--1.2.sql new file mode 100644 index 000000000..6cabb8c5e --- /dev/null +++ b/cstore_fdw--1.1--1.2.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.1--1.2.sql */ + +-- No new functions or definitions were added in 1.2 diff --git a/cstore_fdw--1.2--1.3.sql b/cstore_fdw--1.2--1.3.sql new file mode 100644 index 000000000..3ad187d09 --- /dev/null +++ b/cstore_fdw--1.2--1.3.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.2--1.3.sql */ + +-- No new functions or definitions were added in 1.3 diff --git a/cstore_fdw--1.3--1.4.sql b/cstore_fdw--1.3--1.4.sql new file mode 100644 index 000000000..3b7b0f150 --- /dev/null +++ b/cstore_fdw--1.3--1.4.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.3--1.4.sql */ + +-- No new functions or definitions were added in 1.4 diff --git a/cstore_fdw--1.4--1.5.sql b/cstore_fdw--1.4--1.5.sql new file mode 100644 index 000000000..55bbb0b2a --- /dev/null +++ b/cstore_fdw--1.4--1.5.sql @@ -0,0 +1,28 @@ +/* cstore_fdw/cstore_fdw--1.4--1.5.sql */ + +CREATE FUNCTION cstore_clean_table_resources(oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION cstore_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + AS $csdt$ +DECLARE v_obj record; +BEGIN + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP + + IF v_obj.object_type NOT IN ('table', 'foreign table') THEN + CONTINUE; + END IF; + + PERFORM cstore_clean_table_resources(v_obj.objid); + + END LOOP; +END; +$csdt$; + +CREATE EVENT TRIGGER cstore_drop_event + ON SQL_DROP + EXECUTE PROCEDURE cstore_drop_trigger(); diff --git a/cstore_fdw--1.5--1.6.sql b/cstore_fdw--1.5--1.6.sql new file mode 100644 index 000000000..c8f7e8097 --- /dev/null +++ b/cstore_fdw--1.5--1.6.sql @@ -0,0 +1,19 @@ +/* cstore_fdw/cstore_fdw--1.5--1.6.sql */ + +CREATE OR REPLACE FUNCTION cstore_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + AS $csdt$ +DECLARE v_obj record; +BEGIN + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP + + IF v_obj.object_type NOT IN ('table', 'foreign table') THEN + CONTINUE; + END IF; + + PERFORM public.cstore_clean_table_resources(v_obj.objid); + + END LOOP; +END; +$csdt$; diff --git a/cstore_fdw--1.6--1.7.sql b/cstore_fdw--1.6--1.7.sql new file mode 100644 index 000000000..c7f56f059 --- /dev/null +++ b/cstore_fdw--1.6--1.7.sql @@ -0,0 +1,3 @@ +/* cstore_fdw/cstore_fdw--1.6--1.6.sql */ + +-- No new functions or definitions were added in 1.7 diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql new file mode 100644 index 000000000..ad2683f52 --- /dev/null +++ b/cstore_fdw--1.7.sql @@ -0,0 +1,60 @@ +/* cstore_fdw/cstore_fdw--1.7.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION cstore_fdw" to load this file. \quit + +CREATE FUNCTION cstore_fdw_handler() +RETURNS fdw_handler +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FUNCTION cstore_fdw_validator(text[], oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER cstore_fdw +HANDLER cstore_fdw_handler +VALIDATOR cstore_fdw_validator; + +CREATE FUNCTION cstore_ddl_event_end_trigger() +RETURNS event_trigger +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE EVENT TRIGGER cstore_ddl_event_end +ON ddl_command_end +EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); + +CREATE FUNCTION cstore_table_size(relation regclass) +RETURNS bigint +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION cstore_clean_table_resources(oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE OR REPLACE FUNCTION cstore_drop_trigger() + RETURNS event_trigger + LANGUAGE plpgsql + AS $csdt$ +DECLARE v_obj record; +BEGIN + FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP + + IF v_obj.object_type NOT IN ('table', 'foreign table') THEN + CONTINUE; + END IF; + + PERFORM public.cstore_clean_table_resources(v_obj.objid); + + END LOOP; +END; +$csdt$; + +CREATE EVENT TRIGGER cstore_drop_event + ON SQL_DROP + EXECUTE PROCEDURE cstore_drop_trigger(); + diff --git a/cstore_fdw.c b/cstore_fdw.c new file mode 100644 index 000000000..b0a327768 --- /dev/null +++ b/cstore_fdw.c @@ -0,0 +1,2414 @@ +/*------------------------------------------------------------------------- + * + * cstore_fdw.c + * + * This file contains the function definitions for scanning, analyzing, and + * copying into cstore_fdw foreign tables. Note that this file uses the API + * provided by cstore_reader and cstore_writer for reading and writing cstore + * files. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "cstore_fdw.h" +#include "cstore_version_compat.h" + +#include +#include +#include +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/sysattr.h" +#include "access/tuptoaster.h" +#include "catalog/namespace.h" +#include "catalog/pg_foreign_table.h" +#include "catalog/pg_namespace.h" +#include "commands/copy.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/event_trigger.h" +#include "commands/explain.h" +#include "commands/extension.h" +#include "commands/vacuum.h" +#include "foreign/fdwapi.h" +#include "foreign/foreign.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/planmain.h" +#include "optimizer/restrictinfo.h" +#if PG_VERSION_NUM >= 120000 +#include "access/heapam.h" +#include "access/tableam.h" +#include "executor/tuptable.h" +#include "optimizer/optimizer.h" +#else +#include "optimizer/var.h" +#endif +#include "parser/parser.h" +#include "parser/parsetree.h" +#include "parser/parse_coerce.h" +#include "parser/parse_type.h" +#include "storage/fd.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#if PG_VERSION_NUM >= 120000 +#include "utils/snapmgr.h" +#else +#include "utils/tqual.h" +#endif + + +/* local functions forward declarations */ +#if PG_VERSION_NUM >= 100000 +static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, char *completionTag); +#else +static void CStoreProcessUtility(Node *parseTree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + DestReceiver *destReceiver, char *completionTag); +#endif +static bool CopyCStoreTableStatement(CopyStmt* copyStatement); +static void CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement); +static void CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, + char *completionTag); +static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, + const char *queryString); +static uint64 CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString); +static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); +static List * DroppedCStoreFilenameList(DropStmt *dropStatement); +static List * FindCStoreTables(List *tableList); +static List * OpenRelationsForTruncate(List *cstoreTableList); +static void TruncateCStoreTables(List *cstoreRelationList); +static void DeleteCStoreTableFiles(char *filename); +static void InitializeCStoreTableFile(Oid relationId, Relation relation); +static bool CStoreTable(Oid relationId); +static bool CStoreServer(ForeignServer *server); +static bool DistributedTable(Oid relationId); +static bool DistributedWorkerCopy(CopyStmt *copyStatement); +static void CreateCStoreDatabaseDirectory(Oid databaseOid); +static bool DirectoryExists(StringInfo directoryName); +static void CreateDirectory(StringInfo directoryName); +static void RemoveCStoreDatabaseDirectory(Oid databaseOid); +static StringInfo OptionNamesString(Oid currentContextId); +static HeapTuple GetSlotHeapTuple(TupleTableSlot *tts); +static CStoreFdwOptions * CStoreGetOptions(Oid foreignTableId); +static char * CStoreGetOptionValue(Oid foreignTableId, const char *optionName); +static void ValidateForeignTableOptions(char *filename, char *compressionTypeString, + char *stripeRowCountString, + char *blockRowCountString); +static char * CStoreDefaultFilePath(Oid foreignTableId); +static CompressionType ParseCompressionType(const char *compressionTypeString); +static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId); +static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId); +#if PG_VERSION_NUM >= 90500 +static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId, ForeignPath *bestPath, + List *targetList, List *scanClauses, + Plan *outerPlan); +#else +static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, + Oid foreignTableId, ForeignPath *bestPath, + List *targetList, List *scanClauses); +#endif +static double TupleCountEstimate(RelOptInfo *baserel, const char *filename); +static BlockNumber PageCount(const char *filename); +static List * ColumnList(RelOptInfo *baserel, Oid foreignTableId); +static void CStoreExplainForeignScan(ForeignScanState *scanState, + ExplainState *explainState); +static void CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags); +static TupleTableSlot * CStoreIterateForeignScan(ForeignScanState *scanState); +static void CStoreEndForeignScan(ForeignScanState *scanState); +static void CStoreReScanForeignScan(ForeignScanState *scanState); +static bool CStoreAnalyzeForeignTable(Relation relation, + AcquireSampleRowsFunc *acquireSampleRowsFunc, + BlockNumber *totalPageCount); +static int CStoreAcquireSampleRows(Relation relation, int logLevel, + HeapTuple *sampleRows, int targetRowCount, + double *totalRowCount, double *totalDeadRowCount); +static List * CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, + Index resultRelation, int subplanIndex); +static void CStoreBeginForeignModify(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo, List *fdwPrivate, + int subplanIndex, int executorflags); +static void CStoreBeginForeignInsert(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo); +static TupleTableSlot * CStoreExecForeignInsert(EState *executorState, + ResultRelInfo *relationInfo, + TupleTableSlot *tupleSlot, + TupleTableSlot *planSlot); +static void CStoreEndForeignModify(EState *executorState, ResultRelInfo *relationInfo); +static void CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo); +#if PG_VERSION_NUM >= 90600 +static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +#endif + +/* declarations for dynamic loading */ +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); +PG_FUNCTION_INFO_V1(cstore_table_size); +PG_FUNCTION_INFO_V1(cstore_fdw_handler); +PG_FUNCTION_INFO_V1(cstore_fdw_validator); +PG_FUNCTION_INFO_V1(cstore_clean_table_resources); + + +/* saved hook value in case of unload */ +static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; + + +/* + * _PG_init is called when the module is loaded. In this function we save the + * previous utility hook, and then install our hook to pre-intercept calls to + * the copy command. + */ +void _PG_init(void) +{ + PreviousProcessUtilityHook = ProcessUtility_hook; + ProcessUtility_hook = CStoreProcessUtility; +} + + +/* + * _PG_fini is called when the module is unloaded. This function uninstalls the + * extension's hooks. + */ +void _PG_fini(void) +{ + ProcessUtility_hook = PreviousProcessUtilityHook; +} + + +/* + * cstore_ddl_event_end_trigger is the event trigger function which is called on + * ddl_command_end event. This function creates required directories after the + * CREATE SERVER statement and valid data and footer files after the CREATE FOREIGN + * TABLE statement. + */ +Datum +cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) +{ + EventTriggerData *triggerData = NULL; + Node *parseTree = NULL; + + /* error if event trigger manager did not call this function */ + if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) + { + ereport(ERROR, (errmsg("trigger not fired by event trigger manager"))); + } + + triggerData = (EventTriggerData *) fcinfo->context; + parseTree = triggerData->parsetree; + + if (nodeTag(parseTree) == T_CreateForeignServerStmt) + { + CreateForeignServerStmt *serverStatement = (CreateForeignServerStmt *) parseTree; + + char *foreignWrapperName = serverStatement->fdwname; + if (strncmp(foreignWrapperName, CSTORE_FDW_NAME, NAMEDATALEN) == 0) + { + CreateCStoreDatabaseDirectory(MyDatabaseId); + } + } + else if (nodeTag(parseTree) == T_CreateForeignTableStmt) + { + CreateForeignTableStmt *createStatement = (CreateForeignTableStmt *) parseTree; + char *serverName = createStatement->servername; + + bool missingOK = false; + ForeignServer *server = GetForeignServerByName(serverName, missingOK); + if (CStoreServer(server)) + { + Oid relationId = RangeVarGetRelid(createStatement->base.relation, + AccessShareLock, false); + Relation relation = heap_open(relationId, AccessExclusiveLock); + + /* + * Make sure database directory exists before creating a table. + * This is necessary when a foreign server is created inside + * a template database and a new database is created out of it. + * We have no chance to hook into server creation to create data + * directory for it during database creation time. + */ + CreateCStoreDatabaseDirectory(MyDatabaseId); + + InitializeCStoreTableFile(relationId, relation); + heap_close(relation, AccessExclusiveLock); + } + } + + PG_RETURN_NULL(); +} + + +/* + * CStoreProcessUtility is the hook for handling utility commands. This function + * customizes the behaviour of "COPY cstore_table" and "DROP FOREIGN TABLE + * cstore_table" commands. For all other utility statements, the function calls + * the previous utility hook or the standard utility command via macro + * CALL_PREVIOUS_UTILITY. + */ +#if PG_VERSION_NUM >= 100000 +static void +CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, char *completionTag) +#else +static void +CStoreProcessUtility(Node * parseTree, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + DestReceiver *destReceiver, char *completionTag) +#endif +{ +#if PG_VERSION_NUM >= 100000 + Node *parseTree = plannedStatement->utilityStmt; +#endif + + if (nodeTag(parseTree) == T_CopyStmt) + { + CopyStmt *copyStatement = (CopyStmt *) parseTree; + + if (CopyCStoreTableStatement(copyStatement)) + { + CStoreProcessCopyCommand(copyStatement, queryString, completionTag); + } + else + { + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + } + } + else if (nodeTag(parseTree) == T_DropStmt) + { + DropStmt *dropStmt = (DropStmt *) parseTree; + + if (dropStmt->removeType == OBJECT_EXTENSION) + { + bool removeCStoreDirectory = false; + ListCell *objectCell = NULL; + + foreach(objectCell, dropStmt->objects) + { + Node *object = (Node *) lfirst(objectCell); + char *objectName = NULL; + +#if PG_VERSION_NUM >= 100000 + Assert(IsA(object, String)); + objectName = strVal(object); +#else + Assert(IsA(object, List)); + objectName = strVal(linitial((List *) object)); +#endif + + if (strncmp(CSTORE_FDW_NAME, objectName, NAMEDATALEN) == 0) + { + removeCStoreDirectory = true; + } + } + + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + + if (removeCStoreDirectory) + { + RemoveCStoreDatabaseDirectory(MyDatabaseId); + } + } + else + { + ListCell *fileListCell = NULL; + List *droppedTables = DroppedCStoreFilenameList((DropStmt *) parseTree); + + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + + foreach(fileListCell, droppedTables) + { + char *fileName = lfirst(fileListCell); + + DeleteCStoreTableFiles(fileName); + } + } + } + else if (nodeTag(parseTree) == T_TruncateStmt) + { + TruncateStmt *truncateStatement = (TruncateStmt *) parseTree; + List *allTablesList = truncateStatement->relations; + List *cstoreTablesList = FindCStoreTables(allTablesList); + List *otherTablesList = list_difference(allTablesList, cstoreTablesList); + List *cstoreRelationList = OpenRelationsForTruncate(cstoreTablesList); + ListCell *cstoreRelationCell = NULL; + + if (otherTablesList != NIL) + { + truncateStatement->relations = otherTablesList; + + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + /* restore the former relation list. Our + * replacement could be freed but still needed + * in a cached plan. A truncate can be cached + * if run from a pl/pgSQL function */ + truncateStatement->relations = allTablesList; + } + + TruncateCStoreTables(cstoreRelationList); + + foreach(cstoreRelationCell, cstoreRelationList) + { + Relation relation = (Relation) lfirst(cstoreRelationCell); + heap_close(relation, AccessExclusiveLock); + } + } + else if (nodeTag(parseTree) == T_AlterTableStmt) + { + AlterTableStmt *alterTable = (AlterTableStmt *) parseTree; + CStoreProcessAlterTableCommand(alterTable); + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + } + else if (nodeTag(parseTree) == T_DropdbStmt) + { + DropdbStmt *dropDdStmt = (DropdbStmt *) parseTree; + bool missingOk = true; + Oid databaseOid = get_database_oid(dropDdStmt->dbname, missingOk); + + /* let postgres handle error checking and dropping of the database */ + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + + if (databaseOid != InvalidOid) + { + RemoveCStoreDatabaseDirectory(databaseOid); + } + } + /* handle other utility statements */ + else + { + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); + } +} + + +/* + * CopyCStoreTableStatement check whether the COPY statement is a "COPY cstore_table FROM + * ..." or "COPY cstore_table TO ...." statement. If it is then the function returns + * true. The function returns false otherwise. + */ +static bool +CopyCStoreTableStatement(CopyStmt* copyStatement) +{ + bool copyCStoreTableStatement = false; + + if (copyStatement->relation != NULL) + { + Oid relationId = RangeVarGetRelid(copyStatement->relation, + AccessShareLock, true); + bool cstoreTable = CStoreTable(relationId); + if (cstoreTable) + { + bool distributedTable = DistributedTable(relationId); + bool distributedCopy = DistributedWorkerCopy(copyStatement); + + if (distributedTable || distributedCopy) + { + /* let COPY on distributed tables fall through to Citus */ + copyCStoreTableStatement = false; + } + else + { + copyCStoreTableStatement = true; + } + } + } + + return copyCStoreTableStatement; +} + + +/* + * CheckSuperuserPrivilegesForCopy checks if superuser privilege is required by + * copy operation and reports error if user does not have superuser rights. + */ +static void +CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement) +{ + /* + * We disallow copy from file or program except to superusers. These checks + * are based on the checks in DoCopy() function of copy.c. + */ + if (copyStatement->filename != NULL && !superuser()) + { + if (copyStatement->is_program) + { + ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from a program"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + else + { + ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to COPY to or from a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); + } + } +} + + +/* + * CStoreProcessCopyCommand handles COPY FROM/TO ... statements. + * It determines the copy direction and forwards execution to appropriate function. + */ +static void +CStoreProcessCopyCommand(CopyStmt *copyStatement, const char* queryString, + char *completionTag) +{ + uint64 processedCount = 0; + + if (copyStatement->is_from) + { + processedCount = CopyIntoCStoreTable(copyStatement, queryString); + } + else + { + processedCount = CopyOutCStoreTable(copyStatement, queryString); + } + + if (completionTag != NULL) + { + snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "COPY " UINT64_FORMAT, + processedCount); + } +} + + +/* + * CopyIntoCStoreTable handles a "COPY cstore_table FROM" statement. This + * function uses the COPY command's functions to read and parse rows from + * the data source specified in the COPY statement. The function then writes + * each row to the file specified in the cstore foreign table options. Finally, + * the function returns the number of copied rows. + */ +static uint64 +CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) +{ + uint64 processedRowCount = 0; + Relation relation = NULL; + Oid relationId = InvalidOid; + TupleDesc tupleDescriptor = NULL; + uint32 columnCount = 0; + CopyState copyState = NULL; + bool nextRowFound = true; + Datum *columnValues = NULL; + bool *columnNulls = NULL; + TableWriteState *writeState = NULL; + CStoreFdwOptions *cstoreFdwOptions = NULL; + MemoryContext tupleContext = NULL; + + /* Only superuser can copy from or to local file */ + CheckSuperuserPrivilegesForCopy(copyStatement); + + Assert(copyStatement->relation != NULL); + + /* + * Open and lock the relation. We acquire ShareUpdateExclusiveLock to allow + * concurrent reads, but block concurrent writes. + */ + relation = heap_openrv(copyStatement->relation, ShareUpdateExclusiveLock); + relationId = RelationGetRelid(relation); + + /* allocate column values and nulls arrays */ + tupleDescriptor = RelationGetDescr(relation); + columnCount = tupleDescriptor->natts; + columnValues = palloc0(columnCount * sizeof(Datum)); + columnNulls = palloc0(columnCount * sizeof(bool)); + + cstoreFdwOptions = CStoreGetOptions(relationId); + + /* + * We create a new memory context called tuple context, and read and write + * each row's values within this memory context. After each read and write, + * we reset the memory context. That way, we immediately release memory + * allocated for each row, and don't bloat memory usage with large input + * files. + */ + tupleContext = AllocSetContextCreate(CurrentMemoryContext, + "CStore COPY Row Memory Context", + ALLOCSET_DEFAULT_SIZES); + + /* init state to read from COPY data source */ +#if (PG_VERSION_NUM >= 100000) + { + ParseState *pstate = make_parsestate(NULL); + pstate->p_sourcetext = queryString; + + copyState = BeginCopyFrom(pstate, relation, copyStatement->filename, + copyStatement->is_program, + NULL, + copyStatement->attlist, + copyStatement->options); + free_parsestate(pstate); + } +#else + copyState = BeginCopyFrom(relation, copyStatement->filename, + copyStatement->is_program, + copyStatement->attlist, + copyStatement->options); +#endif + + /* init state to write to the cstore file */ + writeState = CStoreBeginWrite(cstoreFdwOptions->filename, + cstoreFdwOptions->compressionType, + cstoreFdwOptions->stripeRowCount, + cstoreFdwOptions->blockRowCount, + tupleDescriptor); + + while (nextRowFound) + { + /* read the next row in tupleContext */ + MemoryContext oldContext = MemoryContextSwitchTo(tupleContext); +#if PG_VERSION_NUM >= 120000 + nextRowFound = NextCopyFrom(copyState, NULL, columnValues, columnNulls); +#else + nextRowFound = NextCopyFrom(copyState, NULL, columnValues, columnNulls, NULL); +#endif + MemoryContextSwitchTo(oldContext); + + /* write the row to the cstore file */ + if (nextRowFound) + { + CStoreWriteRow(writeState, columnValues, columnNulls); + processedRowCount++; + } + + MemoryContextReset(tupleContext); + + CHECK_FOR_INTERRUPTS(); + } + + /* end read/write sessions and close the relation */ + EndCopyFrom(copyState); + CStoreEndWrite(writeState); + heap_close(relation, ShareUpdateExclusiveLock); + + return processedRowCount; +} + + +/* + * CopyFromCStoreTable handles a "COPY cstore_table TO ..." statement. Statement + * is converted to "COPY (SELECT * FROM cstore_table) TO ..." and forwarded to + * postgres native COPY handler. Function returns number of files copied to external + * stream. Copying selected columns from cstore table is not currently supported. + */ +static uint64 +CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString) +{ + uint64 processedCount = 0; + RangeVar *relation = NULL; + char *qualifiedName = NULL; + List *queryList = NIL; + Node *rawQuery = NULL; + + StringInfo newQuerySubstring = makeStringInfo(); + + if (copyStatement->attlist != NIL) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("copy column list is not supported"), + errhint("use 'copy (select from ) to " + "...' instead"))); + } + + relation = copyStatement->relation; + qualifiedName = quote_qualified_identifier(relation->schemaname, + relation->relname); + appendStringInfo(newQuerySubstring, "select * from %s", qualifiedName); + queryList = raw_parser(newQuerySubstring->data); + + /* take the first parse tree */ + rawQuery = linitial(queryList); + + /* + * Set the relation field to NULL so that COPY command works on + * query field instead. + */ + copyStatement->relation = NULL; + +#if (PG_VERSION_NUM >= 100000) + /* + * raw_parser returns list of RawStmt* in PG 10+ we need to + * extract actual query from it. + */ + { + ParseState *pstate = make_parsestate(NULL); + RawStmt *rawStatement = (RawStmt *) rawQuery; + + pstate->p_sourcetext = newQuerySubstring->data; + copyStatement->query = rawStatement->stmt; + + DoCopy(pstate, copyStatement, -1, -1, &processedCount); + free_parsestate(pstate); + } +#else + copyStatement->query = rawQuery; + + DoCopy(copyStatement, queryString, &processedCount); +#endif + + return processedCount; +} + + +/* + * CStoreProcessAlterTableCommand checks if given alter table statement is + * compatible with underlying data structure. Currently it only checks alter + * column type. The function errors out if current column type can not be safely + * converted to requested column type. This check is more restrictive than + * PostgreSQL's because we can not change existing data. + */ +static void +CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) +{ + ObjectType objectType = alterStatement->relkind; + RangeVar *relationRangeVar = alterStatement->relation; + Oid relationId = InvalidOid; + List *commandList = alterStatement->cmds; + ListCell *commandCell = NULL; + + /* we are only interested in foreign table changes */ + if (objectType != OBJECT_TABLE && objectType != OBJECT_FOREIGN_TABLE) + { + return; + } + + relationId = RangeVarGetRelid(relationRangeVar, AccessShareLock, true); + if (!CStoreTable(relationId)) + { + return; + } + + foreach(commandCell, commandList) + { + AlterTableCmd *alterCommand = (AlterTableCmd *) lfirst(commandCell); + if(alterCommand->subtype == AT_AlterColumnType) + { + char *columnName = alterCommand->name; + ColumnDef *columnDef = (ColumnDef *) alterCommand->def; + Oid targetTypeId = typenameTypeId(NULL, columnDef->typeName); + char *typeName = TypeNameToString(columnDef->typeName); + AttrNumber attributeNumber = get_attnum(relationId, columnName); + Oid currentTypeId = InvalidOid; + + if (attributeNumber <= 0) + { + /* let standard utility handle this */ + continue; + } + + currentTypeId = get_atttype(relationId, attributeNumber); + + /* + * We are only interested in implicit coersion type compatibility. + * Erroring out here to prevent further processing. + */ + if (!can_coerce_type(1, ¤tTypeId, &targetTypeId, COERCION_IMPLICIT)) + { + ereport(ERROR, (errmsg("Column %s cannot be cast automatically to " + "type %s", columnName, typeName))); + } + } + } +} + + +/* + * DropppedCStoreFilenameList extracts and returns the list of cstore file names + * from DROP table statement + */ +static List * +DroppedCStoreFilenameList(DropStmt *dropStatement) +{ + List *droppedCStoreFileList = NIL; + + if (dropStatement->removeType == OBJECT_FOREIGN_TABLE) + { + ListCell *dropObjectCell = NULL; + foreach(dropObjectCell, dropStatement->objects) + { + List *tableNameList = (List *) lfirst(dropObjectCell); + RangeVar *rangeVar = makeRangeVarFromNameList(tableNameList); + + Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); + if (CStoreTable(relationId)) + { + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(relationId); + char *defaultfilename = CStoreDefaultFilePath(relationId); + + /* + * Skip files that are placed in default location, they are handled + * by sql drop trigger. Both paths are generated by code, use + * of strcmp is safe here. + */ + if (strcmp(defaultfilename, cstoreFdwOptions->filename) == 0) + { + continue; + } + + droppedCStoreFileList = lappend(droppedCStoreFileList, + cstoreFdwOptions->filename); + } + } + } + + return droppedCStoreFileList; +} + + +/* FindCStoreTables returns list of CStore tables from given table list */ +static List * +FindCStoreTables(List *tableList) +{ + List *cstoreTableList = NIL; + ListCell *relationCell = NULL; + foreach(relationCell, tableList) + { + RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); + Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); + if (CStoreTable(relationId) && !DistributedTable(relationId)) + { + cstoreTableList = lappend(cstoreTableList, rangeVar); + } + } + + return cstoreTableList; +} + + +/* + * OpenRelationsForTruncate opens and locks relations for tables to be truncated. + * + * It also performs a permission checks to see if the user has truncate privilege + * on tables. + */ +static List * +OpenRelationsForTruncate(List *cstoreTableList) +{ + ListCell *relationCell = NULL; + List *relationIdList = NIL; + List *relationList = NIL; + foreach(relationCell, cstoreTableList) + { + RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); + Relation relation = heap_openrv(rangeVar, AccessExclusiveLock); + Oid relationId = relation->rd_id; + AclResult aclresult = pg_class_aclcheck(relationId, GetUserId(), + ACL_TRUNCATE); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, ACLCHECK_OBJECT_TABLE, get_rel_name(relationId)); + } + + /* check if this relation is repeated */ + if (list_member_oid(relationIdList, relationId)) + { + heap_close(relation, AccessExclusiveLock); + } + else + { + relationIdList = lappend_oid(relationIdList, relationId); + relationList = lappend(relationList, relation); + } + } + + return relationList; +} + + +/* TruncateCStoreTable truncates given cstore tables */ +static void +TruncateCStoreTables(List *cstoreRelationList) +{ + ListCell *relationCell = NULL; + foreach(relationCell, cstoreRelationList) + { + Relation relation = (Relation) lfirst(relationCell); + Oid relationId = relation->rd_id; + CStoreFdwOptions *cstoreFdwOptions = NULL; + + Assert(CStoreTable(relationId)); + + cstoreFdwOptions = CStoreGetOptions(relationId); + DeleteCStoreTableFiles(cstoreFdwOptions->filename); + InitializeCStoreTableFile(relationId, relation); + } +} + + +/* + * DeleteCStoreTableFiles deletes the data and footer files for a cstore table + * whose data filename is given. + */ +static void +DeleteCStoreTableFiles(char *filename) +{ + int dataFileRemoved = 0; + int footerFileRemoved = 0; + + StringInfo tableFooterFilename = makeStringInfo(); + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + /* delete the footer file */ + footerFileRemoved = unlink(tableFooterFilename->data); + if (footerFileRemoved != 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not delete file \"%s\": %m", + tableFooterFilename->data))); + } + + /* delete the data file */ + dataFileRemoved = unlink(filename); + if (dataFileRemoved != 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not delete file \"%s\": %m", + filename))); + } +} + + +/* + * InitializeCStoreTableFile creates data and footer file for a cstore table. + * The function assumes data and footer files do not exist, therefore + * it should be called on empty or non-existing table. Notice that the caller + * is expected to acquire AccessExclusiveLock on the relation. + */ +static void InitializeCStoreTableFile(Oid relationId, Relation relation) +{ + TableWriteState *writeState = NULL; + TupleDesc tupleDescriptor = RelationGetDescr(relation); + CStoreFdwOptions* cstoreFdwOptions = CStoreGetOptions(relationId); + + /* + * Initialize state to write to the cstore file. This creates an + * empty data file and a valid footer file for the table. + */ + writeState = CStoreBeginWrite(cstoreFdwOptions->filename, + cstoreFdwOptions->compressionType, cstoreFdwOptions->stripeRowCount, + cstoreFdwOptions->blockRowCount, tupleDescriptor); + CStoreEndWrite(writeState); +} + + + +/* + * CStoreTable checks if the given table name belongs to a foreign columnar store + * table. If it does, the function returns true. Otherwise, it returns false. + */ +static bool +CStoreTable(Oid relationId) +{ + bool cstoreTable = false; + char relationKind = 0; + + if (relationId == InvalidOid) + { + return false; + } + + relationKind = get_rel_relkind(relationId); + if (relationKind == RELKIND_FOREIGN_TABLE) + { + ForeignTable *foreignTable = GetForeignTable(relationId); + ForeignServer *server = GetForeignServer(foreignTable->serverid); + if (CStoreServer(server)) + { + cstoreTable = true; + } + } + + return cstoreTable; +} + + +/* + * CStoreServer checks if the given foreign server belongs to cstore_fdw. If it + * does, the function returns true. Otherwise, it returns false. + */ +static bool +CStoreServer(ForeignServer *server) +{ + ForeignDataWrapper *foreignDataWrapper = GetForeignDataWrapper(server->fdwid); + bool cstoreServer = false; + + char *foreignWrapperName = foreignDataWrapper->fdwname; + if (strncmp(foreignWrapperName, CSTORE_FDW_NAME, NAMEDATALEN) == 0) + { + cstoreServer = true; + } + + return cstoreServer; +} + + +/* + * DistributedTable checks if the given relationId is the OID of a distributed table, + * which may also be a cstore_fdw table, but in that case COPY should be handled by + * Citus. + */ +static bool +DistributedTable(Oid relationId) +{ + bool distributedTable = false; + Oid partitionOid = InvalidOid; + Relation heapRelation = NULL; + TableScanDesc scanDesc = NULL; + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + HeapTuple heapTuple = NULL; + + bool missingOK = true; + Oid extensionOid = get_extension_oid(CITUS_EXTENSION_NAME, missingOK); + if (extensionOid == InvalidOid) + { + /* if the citus extension isn't created, no tables are distributed */ + return false; + } + + partitionOid = get_relname_relid(CITUS_PARTITION_TABLE_NAME, PG_CATALOG_NAMESPACE); + if (partitionOid == InvalidOid) + { + /* the pg_dist_partition table does not exist */ + return false; + } + + heapRelation = heap_open(partitionOid, AccessShareLock); + + ScanKeyInit(&scanKey[0], ATTR_NUM_PARTITION_RELATION_ID, InvalidStrategy, + F_OIDEQ, ObjectIdGetDatum(relationId)); + + scanDesc = table_beginscan(heapRelation, SnapshotSelf, scanKeyCount, scanKey); + + heapTuple = heap_getnext(scanDesc, ForwardScanDirection); + + distributedTable = HeapTupleIsValid(heapTuple); + + table_endscan(scanDesc); + relation_close(heapRelation, AccessShareLock); + + return distributedTable; +} + + +/* + * DistributedWorkerCopy returns whether the Citus-specific master_host option is + * present in the COPY options. + */ +static bool +DistributedWorkerCopy(CopyStmt *copyStatement) +{ + ListCell *optionCell = NULL; + foreach(optionCell, copyStatement->options) + { + DefElem *defel = (DefElem *) lfirst(optionCell); + if (strncmp(defel->defname, "master_host", NAMEDATALEN) == 0) + { + return true; + } + } + + return false; +} + + +/* + * CreateCStoreDatabaseDirectory creates the directory (and parent directories, + * if needed) used to store automatically managed cstore_fdw files. The path to + * the directory is $PGDATA/cstore_fdw/{databaseOid}. + */ +static void +CreateCStoreDatabaseDirectory(Oid databaseOid) +{ + bool cstoreDirectoryExists = false; + bool databaseDirectoryExists = false; + StringInfo cstoreDatabaseDirectoryPath = NULL; + + StringInfo cstoreDirectoryPath = makeStringInfo(); + appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); + + cstoreDirectoryExists = DirectoryExists(cstoreDirectoryPath); + if (!cstoreDirectoryExists) + { + CreateDirectory(cstoreDirectoryPath); + } + + cstoreDatabaseDirectoryPath = makeStringInfo(); + appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, + CSTORE_FDW_NAME, databaseOid); + + databaseDirectoryExists = DirectoryExists(cstoreDatabaseDirectoryPath); + if (!databaseDirectoryExists) + { + CreateDirectory(cstoreDatabaseDirectoryPath); + } +} + + +/* DirectoryExists checks if a directory exists for the given directory name. */ +static bool +DirectoryExists(StringInfo directoryName) +{ + bool directoryExists = true; + struct stat directoryStat; + + int statOK = stat(directoryName->data, &directoryStat); + if (statOK == 0) + { + /* file already exists; check that it is a directory */ + if (!S_ISDIR(directoryStat.st_mode)) + { + ereport(ERROR, (errmsg("\"%s\" is not a directory", directoryName->data), + errhint("You need to remove or rename the file \"%s\".", + directoryName->data))); + } + } + else + { + if (errno == ENOENT) + { + directoryExists = false; + } + else + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat directory \"%s\": %m", + directoryName->data))); + } + } + + return directoryExists; +} + + +/* CreateDirectory creates a new directory with the given directory name. */ +static void +CreateDirectory(StringInfo directoryName) +{ + int makeOK = mkdir(directoryName->data, S_IRWXU); + if (makeOK != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + directoryName->data))); + } +} + + +/* + * RemoveCStoreDatabaseDirectory removes CStore directory previously + * created for this database. + * However it does not remove 'cstore_fdw' directory even if there + * are no other databases left. + */ +static void +RemoveCStoreDatabaseDirectory(Oid databaseOid) +{ + StringInfo cstoreDirectoryPath = makeStringInfo(); + StringInfo cstoreDatabaseDirectoryPath = makeStringInfo(); + + appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); + + appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, + CSTORE_FDW_NAME, databaseOid); + + if (DirectoryExists(cstoreDatabaseDirectoryPath)) + { + rmtree(cstoreDatabaseDirectoryPath->data, true); + } +} + + +/* + * cstore_table_size returns the total on-disk size of a cstore table in bytes. + * The result includes the sizes of data file and footer file. + */ +Datum +cstore_table_size(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + + int64 tableSize = 0; + CStoreFdwOptions *cstoreFdwOptions = NULL; + char *dataFilename = NULL; + StringInfo footerFilename = NULL; + int dataFileStatResult = 0; + int footerFileStatResult = 0; + struct stat dataFileStatBuffer; + struct stat footerFileStatBuffer; + + bool cstoreTable = CStoreTable(relationId); + if (!cstoreTable) + { + ereport(ERROR, (errmsg("relation is not a cstore table"))); + } + + cstoreFdwOptions = CStoreGetOptions(relationId); + dataFilename = cstoreFdwOptions->filename; + + dataFileStatResult = stat(dataFilename, &dataFileStatBuffer); + if (dataFileStatResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", dataFilename))); + } + + footerFilename = makeStringInfo(); + appendStringInfo(footerFilename, "%s%s", dataFilename, + CSTORE_FOOTER_FILE_SUFFIX); + + footerFileStatResult = stat(footerFilename->data, &footerFileStatBuffer); + if (footerFileStatResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + footerFilename->data))); + } + + tableSize += dataFileStatBuffer.st_size; + tableSize += footerFileStatBuffer.st_size; + + PG_RETURN_INT64(tableSize); +} + + +/* + * cstore_fdw_handler creates and returns a struct with pointers to foreign + * table callback functions. + */ +Datum +cstore_fdw_handler(PG_FUNCTION_ARGS) +{ + FdwRoutine *fdwRoutine = makeNode(FdwRoutine); + + fdwRoutine->GetForeignRelSize = CStoreGetForeignRelSize; + fdwRoutine->GetForeignPaths = CStoreGetForeignPaths; + fdwRoutine->GetForeignPlan = CStoreGetForeignPlan; + fdwRoutine->ExplainForeignScan = CStoreExplainForeignScan; + fdwRoutine->BeginForeignScan = CStoreBeginForeignScan; + fdwRoutine->IterateForeignScan = CStoreIterateForeignScan; + fdwRoutine->ReScanForeignScan = CStoreReScanForeignScan; + fdwRoutine->EndForeignScan = CStoreEndForeignScan; + fdwRoutine->AnalyzeForeignTable = CStoreAnalyzeForeignTable; + fdwRoutine->PlanForeignModify = CStorePlanForeignModify; + fdwRoutine->BeginForeignModify = CStoreBeginForeignModify; + fdwRoutine->ExecForeignInsert = CStoreExecForeignInsert; + fdwRoutine->EndForeignModify = CStoreEndForeignModify; + +#if PG_VERSION_NUM >= 110000 + fdwRoutine->BeginForeignInsert = CStoreBeginForeignInsert; + fdwRoutine->EndForeignInsert = CStoreEndForeignInsert; +#endif + +#if PG_VERSION_NUM >= 90600 + fdwRoutine->IsForeignScanParallelSafe = CStoreIsForeignScanParallelSafe; +#endif + + PG_RETURN_POINTER(fdwRoutine); +} + + +/* + * cstore_fdw_validator validates options given to one of the following commands: + * foreign data wrapper, server, user mapping, or foreign table. This function + * errors out if the given option name or its value is considered invalid. + */ +Datum +cstore_fdw_validator(PG_FUNCTION_ARGS) +{ + Datum optionArray = PG_GETARG_DATUM(0); + Oid optionContextId = PG_GETARG_OID(1); + List *optionList = untransformRelOptions(optionArray); + ListCell *optionCell = NULL; + char *filename = NULL; + char *compressionTypeString = NULL; + char *stripeRowCountString = NULL; + char *blockRowCountString = NULL; + + foreach(optionCell, optionList) + { + DefElem *optionDef = (DefElem *) lfirst(optionCell); + char *optionName = optionDef->defname; + bool optionValid = false; + + int32 optionIndex = 0; + for (optionIndex = 0; optionIndex < ValidOptionCount; optionIndex++) + { + const CStoreValidOption *validOption = &(ValidOptionArray[optionIndex]); + + if ((optionContextId == validOption->optionContextId) && + (strncmp(optionName, validOption->optionName, NAMEDATALEN) == 0)) + { + optionValid = true; + break; + } + } + + /* if invalid option, display an informative error message */ + if (!optionValid) + { + StringInfo optionNamesString = OptionNamesString(optionContextId); + + ereport(ERROR, (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("invalid option \"%s\"", optionName), + errhint("Valid options in this context are: %s", + optionNamesString->data))); + } + + if (strncmp(optionName, OPTION_NAME_FILENAME, NAMEDATALEN) == 0) + { + filename = defGetString(optionDef); + } + else if (strncmp(optionName, OPTION_NAME_COMPRESSION_TYPE, NAMEDATALEN) == 0) + { + compressionTypeString = defGetString(optionDef); + } + else if (strncmp(optionName, OPTION_NAME_STRIPE_ROW_COUNT, NAMEDATALEN) == 0) + { + stripeRowCountString = defGetString(optionDef); + } + else if (strncmp(optionName, OPTION_NAME_BLOCK_ROW_COUNT, NAMEDATALEN) == 0) + { + blockRowCountString = defGetString(optionDef); + } + } + + if (optionContextId == ForeignTableRelationId) + { + ValidateForeignTableOptions(filename, compressionTypeString, + stripeRowCountString, blockRowCountString); + } + + PG_RETURN_VOID(); +} + + +/* + * cstore_clean_table_resources cleans up table data and metadata with provided + * relation id. The function is meant to be called from drop_event_trigger. It + * has no way of knowing if the provided relation id belongs to a cstore table. + * Therefore it first checks if data file exists at default location before + * attempting to remove data and footer files. If the table is created at a + * custom path than its resources would not be removed. + */ +Datum +cstore_clean_table_resources(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + StringInfo filePath = makeStringInfo(); + struct stat fileStat; + int statResult = -1; + + appendStringInfo(filePath, "%s/%s/%d/%d", DataDir, CSTORE_FDW_NAME, + (int) MyDatabaseId, (int) relationId); + + /* + * Check to see if the file exist first. This is the only way to + * find out if the table being dropped is a cstore table. + */ + statResult = stat(filePath->data, &fileStat); + if (statResult == 0) + { + DeleteCStoreTableFiles(filePath->data); + } + + PG_RETURN_VOID(); +} + + +/* + * OptionNamesString finds all options that are valid for the current context, + * and concatenates these option names in a comma separated string. The function + * is unchanged from mongo_fdw. + */ +static StringInfo +OptionNamesString(Oid currentContextId) +{ + StringInfo optionNamesString = makeStringInfo(); + bool firstOptionAppended = false; + + int32 optionIndex = 0; + for (optionIndex = 0; optionIndex < ValidOptionCount; optionIndex++) + { + const CStoreValidOption *validOption = &(ValidOptionArray[optionIndex]); + + /* if option belongs to current context, append option name */ + if (currentContextId == validOption->optionContextId) + { + if (firstOptionAppended) + { + appendStringInfoString(optionNamesString, ", "); + } + + appendStringInfoString(optionNamesString, validOption->optionName); + firstOptionAppended = true; + } + } + + return optionNamesString; +} + + +/* + * GetSlotHeapTuple abstracts getting HeapTuple from TupleTableSlot between versions + */ +static HeapTuple +GetSlotHeapTuple(TupleTableSlot *tts) +{ +#if PG_VERSION_NUM >= 120000 + return tts->tts_ops->copy_heap_tuple(tts); +#else + return tts->tts_tuple; +#endif +} + + +/* + * CStoreGetOptions returns the option values to be used when reading and writing + * the cstore file. To resolve these values, the function checks options for the + * foreign table, and if not present, falls back to default values. This function + * errors out if given option values are considered invalid. + */ +static CStoreFdwOptions * +CStoreGetOptions(Oid foreignTableId) +{ + CStoreFdwOptions *cstoreFdwOptions = NULL; + char *filename = NULL; + CompressionType compressionType = DEFAULT_COMPRESSION_TYPE; + int32 stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; + int32 blockRowCount = DEFAULT_BLOCK_ROW_COUNT; + char *compressionTypeString = NULL; + char *stripeRowCountString = NULL; + char *blockRowCountString = NULL; + + filename = CStoreGetOptionValue(foreignTableId, OPTION_NAME_FILENAME); + compressionTypeString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_COMPRESSION_TYPE); + stripeRowCountString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_STRIPE_ROW_COUNT); + blockRowCountString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_BLOCK_ROW_COUNT); + + ValidateForeignTableOptions(filename, compressionTypeString, + stripeRowCountString, blockRowCountString); + + /* parse provided options */ + if (compressionTypeString != NULL) + { + compressionType = ParseCompressionType(compressionTypeString); + } + if (stripeRowCountString != NULL) + { + stripeRowCount = pg_atoi(stripeRowCountString, sizeof(int32), 0); + } + if (blockRowCountString != NULL) + { + blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); + } + + /* set default filename if it is not provided */ + if (filename == NULL) + { + filename = CStoreDefaultFilePath(foreignTableId); + } + + cstoreFdwOptions = palloc0(sizeof(CStoreFdwOptions)); + cstoreFdwOptions->filename = filename; + cstoreFdwOptions->compressionType = compressionType; + cstoreFdwOptions->stripeRowCount = stripeRowCount; + cstoreFdwOptions->blockRowCount = blockRowCount; + + return cstoreFdwOptions; +} + + +/* + * CStoreGetOptionValue walks over foreign table and foreign server options, and + * looks for the option with the given name. If found, the function returns the + * option's value. This function is unchanged from mongo_fdw. + */ +static char * +CStoreGetOptionValue(Oid foreignTableId, const char *optionName) +{ + ForeignTable *foreignTable = NULL; + ForeignServer *foreignServer = NULL; + List *optionList = NIL; + ListCell *optionCell = NULL; + char *optionValue = NULL; + + foreignTable = GetForeignTable(foreignTableId); + foreignServer = GetForeignServer(foreignTable->serverid); + + optionList = list_concat(optionList, foreignTable->options); + optionList = list_concat(optionList, foreignServer->options); + + foreach(optionCell, optionList) + { + DefElem *optionDef = (DefElem *) lfirst(optionCell); + char *optionDefName = optionDef->defname; + + if (strncmp(optionDefName, optionName, NAMEDATALEN) == 0) + { + optionValue = defGetString(optionDef); + break; + } + } + + return optionValue; +} + + +/* + * ValidateForeignTableOptions verifies if given options are valid cstore_fdw + * foreign table options. This function errors out if given option value is + * considered invalid. + */ +static void +ValidateForeignTableOptions(char *filename, char *compressionTypeString, + char *stripeRowCountString, char *blockRowCountString) +{ + /* we currently do not have any checks for filename */ + (void) filename; + + /* check if the provided compression type is valid */ + if (compressionTypeString != NULL) + { + CompressionType compressionType = ParseCompressionType(compressionTypeString); + if (compressionType == COMPRESSION_TYPE_INVALID) + { + ereport(ERROR, (errmsg("invalid compression type"), + errhint("Valid options are: %s", + COMPRESSION_STRING_DELIMITED_LIST))); + } + } + + /* check if the provided stripe row count has correct format and range */ + if (stripeRowCountString != NULL) + { + /* pg_atoi() errors out if the given string is not a valid 32-bit integer */ + int32 stripeRowCount = pg_atoi(stripeRowCountString, sizeof(int32), 0); + if (stripeRowCount < STRIPE_ROW_COUNT_MINIMUM || + stripeRowCount > STRIPE_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("invalid stripe row count"), + errhint("Stripe row count must be an integer between " + "%d and %d", STRIPE_ROW_COUNT_MINIMUM, + STRIPE_ROW_COUNT_MAXIMUM))); + } + } + + /* check if the provided block row count has correct format and range */ + if (blockRowCountString != NULL) + { + /* pg_atoi() errors out if the given string is not a valid 32-bit integer */ + int32 blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); + if (blockRowCount < BLOCK_ROW_COUNT_MINIMUM || + blockRowCount > BLOCK_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("invalid block row count"), + errhint("Block row count must be an integer between " + "%d and %d", BLOCK_ROW_COUNT_MINIMUM, + BLOCK_ROW_COUNT_MAXIMUM))); + } + } +} + + +/* + * CStoreDefaultFilePath constructs the default file path to use for a cstore_fdw + * table. The path is of the form $PGDATA/cstore_fdw/{databaseOid}/{relfilenode}. + */ +static char * +CStoreDefaultFilePath(Oid foreignTableId) +{ + Relation relation = relation_open(foreignTableId, AccessShareLock); + RelFileNode relationFileNode = relation->rd_node; + Oid databaseOid = relationFileNode.dbNode; + Oid relationFileOid = relationFileNode.relNode; + + relation_close(relation, AccessShareLock); + + /* PG12 onward does not create relfilenode for foreign tables */ + if (databaseOid == InvalidOid) + { + databaseOid = MyDatabaseId; + relationFileOid = foreignTableId; + + } + + StringInfo cstoreFilePath = makeStringInfo(); + appendStringInfo(cstoreFilePath, "%s/%s/%u/%u", DataDir, CSTORE_FDW_NAME, + databaseOid, relationFileOid); + + return cstoreFilePath->data; +} + + +/* ParseCompressionType converts a string to a compression type. */ +static CompressionType +ParseCompressionType(const char *compressionTypeString) +{ + CompressionType compressionType = COMPRESSION_TYPE_INVALID; + Assert(compressionTypeString != NULL); + + if (strncmp(compressionTypeString, COMPRESSION_STRING_NONE, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_NONE; + } + else if (strncmp(compressionTypeString, COMPRESSION_STRING_PG_LZ, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_PG_LZ; + } + + return compressionType; +} + + +/* + * CStoreGetForeignRelSize obtains relation size estimates for a foreign table and + * puts its estimate for row count into baserel->rows. + */ +static void +CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) +{ + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename); + double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, + 0, JOIN_INNER, NULL); + + double outputRowCount = clamp_row_est(tupleCountEstimate * rowSelectivity); + baserel->rows = outputRowCount; +} + + +/* + * CStoreGetForeignPaths creates possible access paths for a scan on the foreign + * table. We currently have one possible access path. This path filters out row + * blocks that are refuted by where clauses, and only returns values for the + * projected columns. + */ +static void +CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) +{ + Path *foreignScanPath = NULL; + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + Relation relation = heap_open(foreignTableId, AccessShareLock); + + /* + * We skip reading columns that are not in query. Here we assume that all + * columns in relation have the same width, and estimate the number pages + * that will be read by query. + * + * Ideally, we should also take into account the row blocks that will be + * suppressed. But for that we need to know which columns are used for + * sorting. If we wrongly assume that we are sorted by a specific column + * and underestimate the page count, planner may choose nested loop join + * in a place it shouldn't be used. Choosing merge join or hash join is + * usually safer than nested loop join, so we take the more conservative + * approach and assume all rows in the columnar store file will be read. + * We intend to fix this in later version by improving the row sampling + * algorithm and using the correlation statistics to detect which columns + * are in stored in sorted order. + */ + List *queryColumnList = ColumnList(baserel, foreignTableId); + uint32 queryColumnCount = list_length(queryColumnList); + BlockNumber relationPageCount = PageCount(cstoreFdwOptions->filename); + uint32 relationColumnCount = RelationGetNumberOfAttributes(relation); + + double queryColumnRatio = (double) queryColumnCount / relationColumnCount; + double queryPageCount = relationPageCount * queryColumnRatio; + double totalDiskAccessCost = seq_page_cost * queryPageCount; + + double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename); + + /* + * We estimate costs almost the same way as cost_seqscan(), thus assuming + * that I/O costs are equivalent to a regular table file of the same size. + */ + double filterCostPerTuple = baserel->baserestrictcost.per_tuple; + double cpuCostPerTuple = cpu_tuple_cost + filterCostPerTuple; + double totalCpuCost = cpuCostPerTuple * tupleCountEstimate; + + double startupCost = baserel->baserestrictcost.startup; + double totalCost = startupCost + totalCpuCost + totalDiskAccessCost; + + /* create a foreign path node and add it as the only possible path */ +#if PG_VERSION_NUM >= 90600 + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, + NULL, /* path target */ + baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NULL, /* no outer path */ + NIL); /* no fdw_private */ + +#elif PG_VERSION_NUM >= 90500 + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NULL, /* no outer path */ + NIL); /* no fdw_private */ +#else + foreignScanPath = (Path *) create_foreignscan_path(root, baserel, baserel->rows, + startupCost, totalCost, + NIL, /* no known ordering */ + NULL, /* not parameterized */ + NIL); /* no fdw_private */ +#endif + + add_path(baserel, foreignScanPath); + heap_close(relation, AccessShareLock); +} + + +/* + * CStoreGetForeignPlan creates a ForeignScan plan node for scanning the foreign + * table. We also add the query column list to scan nodes private list, because + * we need it later for skipping over unused columns in the query. + */ +#if PG_VERSION_NUM >= 90500 +static ForeignScan * +CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, + ForeignPath *bestPath, List *targetList, List *scanClauses, + Plan *outerPlan) +#else +static ForeignScan * +CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, + ForeignPath *bestPath, List *targetList, List *scanClauses) +#endif +{ + ForeignScan *foreignScan = NULL; + List *columnList = NIL; + List *foreignPrivateList = NIL; + + /* + * Although we skip row blocks that are refuted by the WHERE clause, but + * we have no native ability to evaluate restriction clauses and make sure + * that all non-related rows are filtered out. So we just put all of the + * scanClauses into the plan node's qual list for the executor to check. + */ + scanClauses = extract_actual_clauses(scanClauses, + false); /* extract regular clauses */ + + /* + * As an optimization, we only read columns that are present in the query. + * To find these columns, we need baserel. We don't have access to baserel + * in executor's callback functions, so we get the column list here and put + * it into foreign scan node's private list. + */ + columnList = ColumnList(baserel, foreignTableId); + foreignPrivateList = list_make1(columnList); + + /* create the foreign scan node */ +#if PG_VERSION_NUM >= 90500 + foreignScan = make_foreignscan(targetList, scanClauses, baserel->relid, + NIL, /* no expressions to evaluate */ + foreignPrivateList, + NIL, + NIL, + NULL); /* no outer path */ +#else + foreignScan = make_foreignscan(targetList, scanClauses, baserel->relid, + NIL, /* no expressions to evaluate */ + foreignPrivateList); +#endif + + return foreignScan; +} + + +/* + * TupleCountEstimate estimates the number of base relation tuples in the given + * file. + */ +static double +TupleCountEstimate(RelOptInfo *baserel, const char *filename) +{ + double tupleCountEstimate = 0.0; + + /* check if the user executed Analyze on this foreign table before */ + if (baserel->pages > 0) + { + /* + * We have number of pages and number of tuples from pg_class (from a + * previous ANALYZE), so compute a tuples-per-page estimate and scale + * that by the current file size. + */ + double tupleDensity = baserel->tuples / (double) baserel->pages; + BlockNumber pageCount = PageCount(filename); + + tupleCountEstimate = clamp_row_est(tupleDensity * (double) pageCount); + } + else + { + tupleCountEstimate = (double) CStoreTableRowCount(filename); + } + + return tupleCountEstimate; +} + + +/* PageCount calculates and returns the number of pages in a file. */ +static BlockNumber +PageCount(const char *filename) +{ + BlockNumber pageCount = 0; + struct stat statBuffer; + + /* if file doesn't exist at plan time, use default estimate for its size */ + int statResult = stat(filename, &statBuffer); + if (statResult < 0) + { + statBuffer.st_size = 10 * BLCKSZ; + } + + pageCount = (statBuffer.st_size + (BLCKSZ - 1)) / BLCKSZ; + if (pageCount < 1) + { + pageCount = 1; + } + + return pageCount; +} + + +/* + * ColumnList takes in the planner's information about this foreign table. The + * function then finds all columns needed for query execution, including those + * used in projections, joins, and filter clauses, de-duplicates these columns, + * and returns them in a new list. This function is taken from mongo_fdw with + * slight modifications. + */ +static List * +ColumnList(RelOptInfo *baserel, Oid foreignTableId) +{ + List *columnList = NIL; + List *neededColumnList = NIL; + AttrNumber columnIndex = 1; + AttrNumber columnCount = baserel->max_attr; +#if PG_VERSION_NUM >= 90600 + List *targetColumnList = baserel->reltarget->exprs; +#else + List *targetColumnList = baserel->reltargetlist; +#endif + ListCell *targetColumnCell = NULL; + List *restrictInfoList = baserel->baserestrictinfo; + ListCell *restrictInfoCell = NULL; + const AttrNumber wholeRow = 0; + Relation relation = heap_open(foreignTableId, AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(relation); + + /* first add the columns used in joins and projections */ + foreach(targetColumnCell, targetColumnList) + { + List *targetVarList = NIL; + Node *targetExpr = (Node *) lfirst(targetColumnCell); + +#if PG_VERSION_NUM >= 90600 + targetVarList = pull_var_clause(targetExpr, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_PLACEHOLDERS); +#else + targetVarList = pull_var_clause(targetExpr, + PVC_RECURSE_AGGREGATES, + PVC_RECURSE_PLACEHOLDERS); +#endif + + neededColumnList = list_union(neededColumnList, targetVarList); + } + + /* then walk over all restriction clauses, and pull up any used columns */ + foreach(restrictInfoCell, restrictInfoList) + { + RestrictInfo *restrictInfo = (RestrictInfo *) lfirst(restrictInfoCell); + Node *restrictClause = (Node *) restrictInfo->clause; + List *clauseColumnList = NIL; + + /* recursively pull up any columns used in the restriction clause */ +#if PG_VERSION_NUM >= 90600 + clauseColumnList = pull_var_clause(restrictClause, + PVC_RECURSE_AGGREGATES | + PVC_RECURSE_PLACEHOLDERS); +#else + clauseColumnList = pull_var_clause(restrictClause, + PVC_RECURSE_AGGREGATES, + PVC_RECURSE_PLACEHOLDERS); +#endif + + neededColumnList = list_union(neededColumnList, clauseColumnList); + } + + /* walk over all column definitions, and de-duplicate column list */ + for (columnIndex = 1; columnIndex <= columnCount; columnIndex++) + { + ListCell *neededColumnCell = NULL; + Var *column = NULL; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex - 1); + + if (attributeForm->attisdropped) + { + continue; + } + + /* look for this column in the needed column list */ + foreach(neededColumnCell, neededColumnList) + { + Var *neededColumn = (Var *) lfirst(neededColumnCell); + if (neededColumn->varattno == columnIndex) + { + column = neededColumn; + break; + } + else if (neededColumn->varattno == wholeRow) + { + Index tableId = neededColumn->varno; + + column = makeVar(tableId, columnIndex, attributeForm->atttypid, + attributeForm->atttypmod, attributeForm->attcollation, + 0); + break; + } + } + + if (column != NULL) + { + columnList = lappend(columnList, column); + } + } + + heap_close(relation, AccessShareLock); + + return columnList; +} + + +/* CStoreExplainForeignScan produces extra output for the Explain command. */ +static void +CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) +{ + Oid foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + + ExplainPropertyText("CStore File", cstoreFdwOptions->filename, explainState); + + /* supress file size if we're not showing cost details */ + if (explainState->costs) + { + struct stat statBuffer; + + int statResult = stat(cstoreFdwOptions->filename, &statBuffer); + if (statResult == 0) + { + ExplainPropertyLong("CStore File Size", (long) statBuffer.st_size, + explainState); + } + } +} + + +/* CStoreBeginForeignScan starts reading the underlying cstore file. */ +static void +CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) +{ + TableReadState *readState = NULL; + Oid foreignTableId = InvalidOid; + CStoreFdwOptions *cstoreFdwOptions = NULL; + Relation currentRelation = scanState->ss.ss_currentRelation; + TupleDesc tupleDescriptor = RelationGetDescr(currentRelation); + List *columnList = NIL; + ForeignScan *foreignScan = NULL; + List *foreignPrivateList = NIL; + List *whereClauseList = NIL; + + /* if Explain with no Analyze, do nothing */ + if (executorFlags & EXEC_FLAG_EXPLAIN_ONLY) + { + return; + } + + foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); + cstoreFdwOptions = CStoreGetOptions(foreignTableId); + + foreignScan = (ForeignScan *) scanState->ss.ps.plan; + foreignPrivateList = (List *) foreignScan->fdw_private; + whereClauseList = foreignScan->scan.plan.qual; + + columnList = (List *) linitial(foreignPrivateList); + readState = CStoreBeginRead(cstoreFdwOptions->filename, tupleDescriptor, + columnList, whereClauseList); + + scanState->fdw_state = (void *) readState; +} + + +/* + * CStoreIterateForeignScan reads the next record from the cstore file, converts + * it to a Postgres tuple, and stores the converted tuple into the ScanTupleSlot + * as a virtual tuple. + */ +static TupleTableSlot * +CStoreIterateForeignScan(ForeignScanState *scanState) +{ + TableReadState *readState = (TableReadState *) scanState->fdw_state; + TupleTableSlot *tupleSlot = scanState->ss.ss_ScanTupleSlot; + bool nextRowFound = false; + + TupleDesc tupleDescriptor = tupleSlot->tts_tupleDescriptor; + Datum *columnValues = tupleSlot->tts_values; + bool *columnNulls = tupleSlot->tts_isnull; + uint32 columnCount = tupleDescriptor->natts; + + /* initialize all values for this row to null */ + memset(columnValues, 0, columnCount * sizeof(Datum)); + memset(columnNulls, true, columnCount * sizeof(bool)); + + ExecClearTuple(tupleSlot); + + nextRowFound = CStoreReadNextRow(readState, columnValues, columnNulls); + if (nextRowFound) + { + ExecStoreVirtualTuple(tupleSlot); + } + + return tupleSlot; +} + + +/* CStoreEndForeignScan finishes scanning the foreign table. */ +static void +CStoreEndForeignScan(ForeignScanState *scanState) +{ + TableReadState *readState = (TableReadState *) scanState->fdw_state; + if (readState != NULL) + { + CStoreEndRead(readState); + } +} + + +/* CStoreReScanForeignScan rescans the foreign table. */ +static void +CStoreReScanForeignScan(ForeignScanState *scanState) +{ + CStoreEndForeignScan(scanState); + CStoreBeginForeignScan(scanState, 0); +} + + +/* + * CStoreAnalyzeForeignTable sets the total page count and the function pointer + * used to acquire a random sample of rows from the foreign file. + */ +static bool +CStoreAnalyzeForeignTable(Relation relation, + AcquireSampleRowsFunc *acquireSampleRowsFunc, + BlockNumber *totalPageCount) +{ + Oid foreignTableId = RelationGetRelid(relation); + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + struct stat statBuffer; + + int statResult = stat(cstoreFdwOptions->filename, &statBuffer); + if (statResult < 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + cstoreFdwOptions->filename))); + } + + (*totalPageCount) = PageCount(cstoreFdwOptions->filename); + (*acquireSampleRowsFunc) = CStoreAcquireSampleRows; + + return true; +} + + +/* + * CStoreAcquireSampleRows acquires a random sample of rows from the foreign + * table. Selected rows are returned in the caller allocated sampleRows array, + * which must have at least target row count entries. The actual number of rows + * selected is returned as the function result. We also count the number of rows + * in the collection and return it in total row count. We also always set dead + * row count to zero. + * + * Note that the returned list of rows does not always follow their actual order + * in the cstore file. Therefore, correlation estimates derived later could be + * inaccurate, but that's OK. We currently don't use correlation estimates (the + * planner only pays attention to correlation for index scans). + */ +static int +CStoreAcquireSampleRows(Relation relation, int logLevel, + HeapTuple *sampleRows, int targetRowCount, + double *totalRowCount, double *totalDeadRowCount) +{ + int sampleRowCount = 0; + double rowCount = 0.0; + double rowCountToSkip = -1; /* -1 means not set yet */ + double selectionState = 0; + MemoryContext oldContext = CurrentMemoryContext; + MemoryContext tupleContext = NULL; + Datum *columnValues = NULL; + bool *columnNulls = NULL; + TupleTableSlot *scanTupleSlot = NULL; + List *columnList = NIL; + List *foreignPrivateList = NULL; + ForeignScanState *scanState = NULL; + ForeignScan *foreignScan = NULL; + char *relationName = NULL; + int executorFlags = 0; + + TupleDesc tupleDescriptor = RelationGetDescr(relation); + uint32 columnCount = tupleDescriptor->natts; + + + /* create list of columns of the relation */ + uint32 columnIndex = 0; + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + const Index tableId = 1; + + if (!attributeForm->attisdropped) + { + Var *column = makeVar(tableId, columnIndex + 1, attributeForm->atttypid, + attributeForm->atttypmod, attributeForm->attcollation, 0); + columnList = lappend(columnList, column); + } + } + + /* setup foreign scan plan node */ + foreignPrivateList = list_make1(columnList); + foreignScan = makeNode(ForeignScan); + foreignScan->fdw_private = foreignPrivateList; + + /* set up tuple slot */ + columnValues = palloc0(columnCount * sizeof(Datum)); + columnNulls = palloc0(columnCount * sizeof(bool)); +#if PG_VERSION_NUM >= 120000 + scanTupleSlot = MakeTupleTableSlot(NULL, &TTSOpsVirtual); +#elif PG_VERSION_NUM >= 110000 + scanTupleSlot = MakeTupleTableSlot(NULL); +#else + scanTupleSlot = MakeTupleTableSlot(); +#endif + scanTupleSlot->tts_tupleDescriptor = tupleDescriptor; + scanTupleSlot->tts_values = columnValues; + scanTupleSlot->tts_isnull = columnNulls; + + /* setup scan state */ + scanState = makeNode(ForeignScanState); + scanState->ss.ss_currentRelation = relation; + scanState->ss.ps.plan = (Plan *) foreignScan; + scanState->ss.ss_ScanTupleSlot = scanTupleSlot; + + /* + * Use per-tuple memory context to prevent leak of memory used to read and + * parse rows from the file. + */ + tupleContext = AllocSetContextCreate(CurrentMemoryContext, + "cstore_fdw temporary context", + ALLOCSET_DEFAULT_SIZES); + + CStoreBeginForeignScan(scanState, executorFlags); + + /* prepare for sampling rows */ + selectionState = anl_init_selection_state(targetRowCount); + + for (;;) + { + /* check for user-requested abort or sleep */ + vacuum_delay_point(); + + memset(columnValues, 0, columnCount * sizeof(Datum)); + memset(columnNulls, true, columnCount * sizeof(bool)); + + MemoryContextReset(tupleContext); + MemoryContextSwitchTo(tupleContext); + + /* read the next record */ + CStoreIterateForeignScan(scanState); + + MemoryContextSwitchTo(oldContext); + + /* if there are no more records to read, break */ + if (TTS_EMPTY(scanTupleSlot)) + { + break; + } + + /* + * The first targetRowCount sample rows are simply copied into the + * reservoir. Then we start replacing tuples in the sample until we + * reach the end of the relation. This algorithm is from Jeff Vitter's + * paper (see more info in commands/analyze.c). + */ + if (sampleRowCount < targetRowCount) + { + sampleRows[sampleRowCount] = heap_form_tuple(tupleDescriptor, columnValues, + columnNulls); + sampleRowCount++; + } + else + { + /* + * t in Vitter's paper is the number of records already processed. + * If we need to compute a new S value, we must use the "not yet + * incremented" value of rowCount as t. + */ + if (rowCountToSkip < 0) + { + rowCountToSkip = anl_get_next_S(rowCount, targetRowCount, + &selectionState); + } + + if (rowCountToSkip <= 0) + { + /* + * Found a suitable tuple, so save it, replacing one old tuple + * at random. + */ + int rowIndex = (int) (targetRowCount * anl_random_fract()); + Assert(rowIndex >= 0); + Assert(rowIndex < targetRowCount); + + heap_freetuple(sampleRows[rowIndex]); + sampleRows[rowIndex] = heap_form_tuple(tupleDescriptor, + columnValues, columnNulls); + } + + rowCountToSkip--; + } + + rowCount++; + } + + /* clean up */ + MemoryContextDelete(tupleContext); + pfree(columnValues); + pfree(columnNulls); + + CStoreEndForeignScan(scanState); + + /* emit some interesting relation info */ + relationName = RelationGetRelationName(relation); + ereport(logLevel, (errmsg("\"%s\": file contains %.0f rows; %d rows in sample", + relationName, rowCount, sampleRowCount))); + + (*totalRowCount) = rowCount; + (*totalDeadRowCount) = 0; + + return sampleRowCount; +} + + +/* + * CStorePlanForeignModify checks if operation is supported. Only insert + * command with subquery (ie insert into
select ...) is supported. + * Other forms of insert, delete, and update commands are not supported. It + * throws an error when the command is not supported. + */ +static List * +CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, + Index resultRelation, int subplanIndex) +{ + bool operationSupported = false; + + if (plan->operation == CMD_INSERT) + { + ListCell *tableCell = NULL; + Query *query = NULL; + + /* + * Only insert operation with select subquery is supported. Other forms + * of insert, update, and delete operations are not supported. + */ + query = plannerInfo->parse; + foreach(tableCell, query->rtable) + { + RangeTblEntry *tableEntry = lfirst(tableCell); + + if (tableEntry->rtekind == RTE_SUBQUERY && + tableEntry->subquery != NULL && + tableEntry->subquery->commandType == CMD_SELECT) + { + operationSupported = true; + break; + } + } + } + + if (!operationSupported) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("operation is not supported"))); + } + + return NIL; +} + + +/* + * CStoreBeginForeignModify prepares cstore table for a modification. + * Only insert is currently supported. + */ +static void +CStoreBeginForeignModify(ModifyTableState *modifyTableState, + ResultRelInfo *relationInfo, List *fdwPrivate, + int subplanIndex, int executorFlags) +{ + /* if Explain with no Analyze, do nothing */ + if (executorFlags & EXEC_FLAG_EXPLAIN_ONLY) + { + return; + } + + Assert (modifyTableState->operation == CMD_INSERT); + + CStoreBeginForeignInsert(modifyTableState, relationInfo); +} + + +/* + * CStoreBeginForeignInsert prepares a cstore table for an insert or rows + * coming from a COPY. + */ +static void +CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo) +{ + Oid foreignTableOid = InvalidOid; + CStoreFdwOptions *cstoreFdwOptions = NULL; + TupleDesc tupleDescriptor = NULL; + TableWriteState *writeState = NULL; + Relation relation = NULL; + + foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); + relation = heap_open(foreignTableOid, ShareUpdateExclusiveLock); + cstoreFdwOptions = CStoreGetOptions(foreignTableOid); + tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); + + writeState = CStoreBeginWrite(cstoreFdwOptions->filename, + cstoreFdwOptions->compressionType, + cstoreFdwOptions->stripeRowCount, + cstoreFdwOptions->blockRowCount, + tupleDescriptor); + + writeState->relation = relation; + relationInfo->ri_FdwState = (void *) writeState; +} + + +/* + * CStoreExecForeignInsert inserts a single row to cstore table + * and returns inserted row's data values. + */ +static TupleTableSlot * +CStoreExecForeignInsert(EState *executorState, ResultRelInfo *relationInfo, + TupleTableSlot *tupleSlot, TupleTableSlot *planSlot) +{ + TableWriteState *writeState = (TableWriteState*) relationInfo->ri_FdwState; + HeapTuple heapTuple; + + Assert(writeState != NULL); + + heapTuple = GetSlotHeapTuple(tupleSlot); + + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + tupleSlot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, tupleSlot, true); + } + + slot_getallattrs(tupleSlot); + + CStoreWriteRow(writeState, tupleSlot->tts_values, tupleSlot->tts_isnull); + + return tupleSlot; +} + + +/* + * CStoreEndForeignModify ends the current modification. Only insert is currently + * supported. + */ +static void +CStoreEndForeignModify(EState *executorState, ResultRelInfo *relationInfo) +{ + CStoreEndForeignInsert(executorState, relationInfo); +} + + +/* + * CStoreEndForeignInsert ends the current insert or COPY operation. + */ +static void +CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) +{ + TableWriteState *writeState = (TableWriteState*) relationInfo->ri_FdwState; + + /* writeState is NULL during Explain queries */ + if (writeState != NULL) + { + Relation relation = writeState->relation; + + CStoreEndWrite(writeState); + heap_close(relation, ShareUpdateExclusiveLock); + } +} + + +#if PG_VERSION_NUM >= 90600 +/* + * CStoreIsForeignScanParallelSafe always returns true to indicate that + * reading from a cstore_fdw table in a parallel worker is safe. This + * does not enable parallelism for queries on individual cstore_fdw + * tables, but does allow parallel scans of cstore_fdw partitions. + * + * cstore_fdw is parallel-safe because all writes are immediately committed + * to disk and then read from disk. There is no uncommitted state that needs + * to be shared across processes. + */ +static bool +CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte) +{ + return true; +} +#endif diff --git a/cstore_fdw.control b/cstore_fdw.control new file mode 100644 index 000000000..a95b8509f --- /dev/null +++ b/cstore_fdw.control @@ -0,0 +1,5 @@ +# cstore_fdw extension +comment = 'foreign-data wrapper for flat cstore access' +default_version = '1.7' +module_pathname = '$libdir/cstore_fdw' +relocatable = true diff --git a/cstore_fdw.h b/cstore_fdw.h new file mode 100644 index 000000000..2bc3e9c97 --- /dev/null +++ b/cstore_fdw.h @@ -0,0 +1,353 @@ +/*------------------------------------------------------------------------- + * + * cstore_fdw.h + * + * Type and function declarations for CStore foreign data wrapper. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_FDW_H +#define CSTORE_FDW_H + +#include "access/tupdesc.h" +#include "fmgr.h" +#include "catalog/pg_am.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_foreign_table.h" +#include "lib/stringinfo.h" +#include "utils/rel.h" + + +/* Defines for valid option names */ +#define OPTION_NAME_FILENAME "filename" +#define OPTION_NAME_COMPRESSION_TYPE "compression" +#define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" +#define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" + +/* Default values for option parameters */ +#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE +#define DEFAULT_STRIPE_ROW_COUNT 150000 +#define DEFAULT_BLOCK_ROW_COUNT 10000 + +/* Limits for option parameters */ +#define STRIPE_ROW_COUNT_MINIMUM 1000 +#define STRIPE_ROW_COUNT_MAXIMUM 10000000 +#define BLOCK_ROW_COUNT_MINIMUM 1000 +#define BLOCK_ROW_COUNT_MAXIMUM 100000 + +/* String representations of compression types */ +#define COMPRESSION_STRING_NONE "none" +#define COMPRESSION_STRING_PG_LZ "pglz" +#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" + +/* CStore file signature */ +#define CSTORE_MAGIC_NUMBER "citus_cstore" +#define CSTORE_VERSION_MAJOR 1 +#define CSTORE_VERSION_MINOR 7 + +/* miscellaneous defines */ +#define CSTORE_FDW_NAME "cstore_fdw" +#define CSTORE_FOOTER_FILE_SUFFIX ".footer" +#define CSTORE_TEMP_FILE_SUFFIX ".tmp" +#define CSTORE_TUPLE_COST_MULTIPLIER 10 +#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 +#define CSTORE_POSTSCRIPT_SIZE_MAX 256 + +/* table containing information about how to partition distributed tables */ +#define CITUS_EXTENSION_NAME "citus" +#define CITUS_PARTITION_TABLE_NAME "pg_dist_partition" + +/* human-readable names for addressing columns of the pg_dist_partition table */ +#define ATTR_NUM_PARTITION_RELATION_ID 1 +#define ATTR_NUM_PARTITION_TYPE 2 +#define ATTR_NUM_PARTITION_KEY 3 + + +/* + * CStoreValidOption keeps an option name and a context. When an option is passed + * into cstore_fdw objects (server and foreign table), we compare this option's + * name and context against those of valid options. + */ +typedef struct CStoreValidOption +{ + const char *optionName; + Oid optionContextId; + +} CStoreValidOption; + + +/* Array of options that are valid for cstore_fdw */ +static const uint32 ValidOptionCount = 4; +static const CStoreValidOption ValidOptionArray[] = +{ + /* foreign table options */ + { OPTION_NAME_FILENAME, ForeignTableRelationId }, + { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, + { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, + { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } +}; + + +/* Enumaration for cstore file's compression method */ +typedef enum +{ + COMPRESSION_TYPE_INVALID = -1, + COMPRESSION_NONE = 0, + COMPRESSION_PG_LZ = 1, + + COMPRESSION_COUNT + +} CompressionType; + + +/* + * CStoreFdwOptions holds the option values to be used when reading or writing + * a cstore file. To resolve these values, we first check foreign table's options, + * and if not present, we then fall back to the default values specified above. + */ +typedef struct CStoreFdwOptions +{ + char *filename; + CompressionType compressionType; + uint64 stripeRowCount; + uint32 blockRowCount; + +} CStoreFdwOptions; + + +/* + * StripeMetadata represents information about a stripe. This information is + * stored in the cstore file's footer. + */ +typedef struct StripeMetadata +{ + uint64 fileOffset; + uint64 skipListLength; + uint64 dataLength; + uint64 footerLength; + +} StripeMetadata; + + +/* TableFooter represents the footer of a cstore file. */ +typedef struct TableFooter +{ + List *stripeMetadataList; + uint64 blockRowCount; + +} TableFooter; + + +/* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ +typedef struct ColumnBlockSkipNode +{ + /* statistics about values of a column block */ + bool hasMinMax; + Datum minimumValue; + Datum maximumValue; + uint64 rowCount; + + /* + * Offsets and sizes of value and exists streams in the column data. + * These enable us to skip reading suppressed row blocks, and start reading + * a block without reading previous blocks. + */ + uint64 valueBlockOffset; + uint64 valueLength; + uint64 existsBlockOffset; + uint64 existsLength; + + CompressionType valueCompressionType; + +} ColumnBlockSkipNode; + + +/* + * StripeSkipList can be used for skipping row blocks. It contains a column block + * skip node for each block of each column. blockSkipNodeArray[column][block] + * is the entry for the specified column block. + */ +typedef struct StripeSkipList +{ + ColumnBlockSkipNode **blockSkipNodeArray; + uint32 columnCount; + uint32 blockCount; + +} StripeSkipList; + + +/* + * ColumnBlockData represents a block of data in a column. valueArray stores + * the values of data, and existsArray stores whether a value is present. + * valueBuffer is used to store (uncompressed) serialized values + * referenced by Datum's in valueArray. It is only used for by-reference Datum's. + * There is a one-to-one correspondence between valueArray and existsArray. + */ +typedef struct ColumnBlockData +{ + bool *existsArray; + Datum *valueArray; + + /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ + StringInfo valueBuffer; + +} ColumnBlockData; + + +/* + * ColumnBlockBuffers represents a block of serialized data in a column. + * valueBuffer stores the serialized values of data, and existsBuffer stores + * serialized value of presence information. valueCompressionType contains + * compression type if valueBuffer is compressed. Finally rowCount has + * the number of rows in this block. + */ +typedef struct ColumnBlockBuffers +{ + StringInfo existsBuffer; + StringInfo valueBuffer; + CompressionType valueCompressionType; + +} ColumnBlockBuffers; + + +/* + * ColumnBuffers represents data buffers for a column in a row stripe. Each + * column is made of multiple column blocks. + */ +typedef struct ColumnBuffers +{ + ColumnBlockBuffers **blockBuffersArray; + +} ColumnBuffers; + + +/* StripeBuffers represents data for a row stripe in a cstore file. */ +typedef struct StripeBuffers +{ + uint32 columnCount; + uint32 rowCount; + ColumnBuffers **columnBuffersArray; + +} StripeBuffers; + + +/* + * StripeFooter represents a stripe's footer. In this footer, we keep three + * arrays of sizes. The number of elements in each of the arrays is equal + * to the number of columns. + */ +typedef struct StripeFooter +{ + uint32 columnCount; + uint64 *skipListSizeArray; + uint64 *existsSizeArray; + uint64 *valueSizeArray; + +} StripeFooter; + + +/* TableReadState represents state of a cstore file read operation. */ +typedef struct TableReadState +{ + FILE *tableFile; + TableFooter *tableFooter; + TupleDesc tupleDescriptor; + + /* + * List of Var pointers for columns in the query. We use this both for + * getting vector of projected columns, and also when we want to build + * base constraint to find selected row blocks. + */ + List *projectedColumnList; + + List *whereClauseList; + MemoryContext stripeReadContext; + StripeBuffers *stripeBuffers; + uint32 readStripeCount; + uint64 stripeReadRowCount; + ColumnBlockData **blockDataArray; + int32 deserializedBlockIndex; + +} TableReadState; + + +/* TableWriteState represents state of a cstore file write operation. */ +typedef struct TableWriteState +{ + FILE *tableFile; + TableFooter *tableFooter; + StringInfo tableFooterFilename; + CompressionType compressionType; + TupleDesc tupleDescriptor; + FmgrInfo **comparisonFunctionArray; + uint64 currentFileOffset; + Relation relation; + + MemoryContext stripeWriteContext; + StripeBuffers *stripeBuffers; + StripeSkipList *stripeSkipList; + uint32 stripeMaxRowCount; + ColumnBlockData **blockDataArray; + /* + * compressionBuffer buffer is used as temporary storage during + * data value compression operation. It is kept here to minimize + * memory allocations. It lives in stripeWriteContext and gets + * deallocated when memory context is reset. + */ + StringInfo compressionBuffer; + +} TableWriteState; + +/* Function declarations for extension loading and unloading */ +extern void _PG_init(void); +extern void _PG_fini(void); + +/* event trigger function declarations */ +extern Datum cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS); + +/* Function declarations for utility UDFs */ +extern Datum cstore_table_size(PG_FUNCTION_ARGS); +extern Datum cstore_clean_table_resources(PG_FUNCTION_ARGS); + +/* Function declarations for foreign data wrapper */ +extern Datum cstore_fdw_handler(PG_FUNCTION_ARGS); +extern Datum cstore_fdw_validator(PG_FUNCTION_ARGS); + +/* Function declarations for writing to a cstore file */ +extern TableWriteState * CStoreBeginWrite(const char *filename, + CompressionType compressionType, + uint64 stripeMaxRowCount, + uint32 blockRowCount, + TupleDesc tupleDescriptor); +extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndWrite(TableWriteState * state); + +/* Function declarations for reading from a cstore file */ +extern TableReadState * CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, + List *projectedColumnList, List *qualConditions); +extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); +extern bool CStoreReadFinished(TableReadState *state); +extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndRead(TableReadState *state); + +/* Function declarations for common functions */ +extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, + int16 procedureId); +extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, + uint32 blockRowCount); +extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, + uint32 columnCount); +extern uint64 CStoreTableRowCount(const char *filename); +extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, + CompressionType compressionType); +extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); + + +#endif /* CSTORE_FDW_H */ diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c new file mode 100644 index 000000000..26402f897 --- /dev/null +++ b/cstore_metadata_serialization.c @@ -0,0 +1,581 @@ +/*------------------------------------------------------------------------- + * + * cstore_metadata_serialization.c + * + * This file contains function definitions for serializing/deserializing cstore + * metadata. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore_fdw.h" +#include "cstore_metadata_serialization.h" +#include "cstore.pb-c.h" +#include "access/tupmacs.h" + + +/* local functions forward declarations */ +static ProtobufCBinaryData DatumToProtobufBinary(Datum datum, bool typeByValue, + int typeLength); +static Datum ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, + bool typeByValue, int typeLength); + + +/* + * SerializePostScript serializes the given postscript and returns the result as + * a StringInfo. + */ +StringInfo +SerializePostScript(uint64 tableFooterLength) +{ + StringInfo postscriptBuffer = NULL; + Protobuf__PostScript protobufPostScript = PROTOBUF__POST_SCRIPT__INIT; + uint8 *postscriptData = NULL; + uint32 postscriptSize = 0; + + protobufPostScript.has_tablefooterlength = true; + protobufPostScript.tablefooterlength = tableFooterLength; + protobufPostScript.has_versionmajor = true; + protobufPostScript.versionmajor = CSTORE_VERSION_MAJOR; + protobufPostScript.has_versionminor = true; + protobufPostScript.versionminor = CSTORE_VERSION_MINOR; + protobufPostScript.magicnumber = pstrdup(CSTORE_MAGIC_NUMBER); + + postscriptSize = protobuf__post_script__get_packed_size(&protobufPostScript); + postscriptData = palloc0(postscriptSize); + protobuf__post_script__pack(&protobufPostScript, postscriptData); + + postscriptBuffer = palloc0(sizeof(StringInfoData)); + postscriptBuffer->len = postscriptSize; + postscriptBuffer->maxlen = postscriptSize; + postscriptBuffer->data = (char *) postscriptData; + + return postscriptBuffer; +} + + +/* + * SerializeTableFooter serializes the given table footer and returns the result + * as a StringInfo. + */ +StringInfo +SerializeTableFooter(TableFooter *tableFooter) +{ + StringInfo tableFooterBuffer = NULL; + Protobuf__TableFooter protobufTableFooter = PROTOBUF__TABLE_FOOTER__INIT; + Protobuf__StripeMetadata **stripeMetadataArray = NULL; + ListCell *stripeMetadataCell = NULL; + uint8 *tableFooterData = NULL; + uint32 tableFooterSize = 0; + uint32 stripeIndex = 0; + + List *stripeMetadataList = tableFooter->stripeMetadataList; + uint32 stripeCount = list_length(stripeMetadataList); + stripeMetadataArray = palloc0(stripeCount * sizeof(Protobuf__StripeMetadata *)); + + foreach(stripeMetadataCell, stripeMetadataList) + { + StripeMetadata *stripeMetadata = lfirst(stripeMetadataCell); + + Protobuf__StripeMetadata *protobufStripeMetadata = NULL; + protobufStripeMetadata = palloc0(sizeof(Protobuf__StripeMetadata)); + protobuf__stripe_metadata__init(protobufStripeMetadata); + protobufStripeMetadata->has_fileoffset = true; + protobufStripeMetadata->fileoffset = stripeMetadata->fileOffset; + protobufStripeMetadata->has_skiplistlength = true; + protobufStripeMetadata->skiplistlength = stripeMetadata->skipListLength; + protobufStripeMetadata->has_datalength = true; + protobufStripeMetadata->datalength = stripeMetadata->dataLength; + protobufStripeMetadata->has_footerlength = true; + protobufStripeMetadata->footerlength = stripeMetadata->footerLength; + + stripeMetadataArray[stripeIndex] = protobufStripeMetadata; + stripeIndex++; + } + + protobufTableFooter.n_stripemetadataarray = stripeCount; + protobufTableFooter.stripemetadataarray = stripeMetadataArray; + protobufTableFooter.has_blockrowcount = true; + protobufTableFooter.blockrowcount = tableFooter->blockRowCount; + + tableFooterSize = protobuf__table_footer__get_packed_size(&protobufTableFooter); + tableFooterData = palloc0(tableFooterSize); + protobuf__table_footer__pack(&protobufTableFooter, tableFooterData); + + tableFooterBuffer = palloc0(sizeof(StringInfoData)); + tableFooterBuffer->len = tableFooterSize; + tableFooterBuffer->maxlen = tableFooterSize; + tableFooterBuffer->data = (char *) tableFooterData; + + return tableFooterBuffer; +} + + +/* + * SerializeStripeFooter serializes given stripe footer and returns the result + * as a StringInfo. + */ +StringInfo +SerializeStripeFooter(StripeFooter *stripeFooter) +{ + StringInfo stripeFooterBuffer = NULL; + Protobuf__StripeFooter protobufStripeFooter = PROTOBUF__STRIPE_FOOTER__INIT; + uint8 *stripeFooterData = NULL; + uint32 stripeFooterSize = 0; + + protobufStripeFooter.n_skiplistsizearray = stripeFooter->columnCount; + protobufStripeFooter.skiplistsizearray = (uint64_t *) stripeFooter->skipListSizeArray; + protobufStripeFooter.n_existssizearray = stripeFooter->columnCount; + protobufStripeFooter.existssizearray = (uint64_t *) stripeFooter->existsSizeArray; + protobufStripeFooter.n_valuesizearray = stripeFooter->columnCount; + protobufStripeFooter.valuesizearray = (uint64_t *) stripeFooter->valueSizeArray; + + stripeFooterSize = protobuf__stripe_footer__get_packed_size(&protobufStripeFooter); + stripeFooterData = palloc0(stripeFooterSize); + protobuf__stripe_footer__pack(&protobufStripeFooter, stripeFooterData); + + stripeFooterBuffer = palloc0(sizeof(StringInfoData)); + stripeFooterBuffer->len = stripeFooterSize; + stripeFooterBuffer->maxlen = stripeFooterSize; + stripeFooterBuffer->data = (char *) stripeFooterData; + + return stripeFooterBuffer; +} + + +/* + * SerializeColumnSkipList serializes a column skip list, where the colum skip + * list includes all block skip nodes for that column. The function then returns + * the result as a string info. + */ +StringInfo +SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, + bool typeByValue, int typeLength) +{ + StringInfo blockSkipListBuffer = NULL; + Protobuf__ColumnBlockSkipList protobufBlockSkipList = + PROTOBUF__COLUMN_BLOCK_SKIP_LIST__INIT; + Protobuf__ColumnBlockSkipNode **protobufBlockSkipNodeArray = NULL; + uint32 blockIndex = 0; + uint8 *blockSkipListData = NULL; + uint32 blockSkipListSize = 0; + + protobufBlockSkipNodeArray = palloc0(blockCount * + sizeof(Protobuf__ColumnBlockSkipNode *)); + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockSkipNode blockSkipNode = blockSkipNodeArray[blockIndex]; + Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; + ProtobufCBinaryData binaryMinimumValue = {0, 0}; + ProtobufCBinaryData binaryMaximumValue = {0, 0}; + + if (blockSkipNode.hasMinMax) + { + binaryMinimumValue = DatumToProtobufBinary(blockSkipNode.minimumValue, + typeByValue, typeLength); + binaryMaximumValue = DatumToProtobufBinary(blockSkipNode.maximumValue, + typeByValue, typeLength); + } + + protobufBlockSkipNode = palloc0(sizeof(Protobuf__ColumnBlockSkipNode)); + protobuf__column_block_skip_node__init(protobufBlockSkipNode); + protobufBlockSkipNode->has_rowcount = true; + protobufBlockSkipNode->rowcount = blockSkipNode.rowCount; + protobufBlockSkipNode->has_minimumvalue = blockSkipNode.hasMinMax; + protobufBlockSkipNode->minimumvalue = binaryMinimumValue; + protobufBlockSkipNode->has_maximumvalue = blockSkipNode.hasMinMax; + protobufBlockSkipNode->maximumvalue = binaryMaximumValue; + protobufBlockSkipNode->has_valueblockoffset = true; + protobufBlockSkipNode->valueblockoffset = blockSkipNode.valueBlockOffset; + protobufBlockSkipNode->has_valuelength = true; + protobufBlockSkipNode->valuelength = blockSkipNode.valueLength; + protobufBlockSkipNode->has_existsblockoffset = true; + protobufBlockSkipNode->existsblockoffset = blockSkipNode.existsBlockOffset; + protobufBlockSkipNode->has_existslength = true; + protobufBlockSkipNode->existslength = blockSkipNode.existsLength; + protobufBlockSkipNode->has_valuecompressiontype = true; + protobufBlockSkipNode->valuecompressiontype = + (Protobuf__CompressionType) blockSkipNode.valueCompressionType; + + protobufBlockSkipNodeArray[blockIndex] = protobufBlockSkipNode; + } + + protobufBlockSkipList.n_blockskipnodearray = blockCount; + protobufBlockSkipList.blockskipnodearray = protobufBlockSkipNodeArray; + + blockSkipListSize = + protobuf__column_block_skip_list__get_packed_size(&protobufBlockSkipList); + blockSkipListData = palloc0(blockSkipListSize); + protobuf__column_block_skip_list__pack(&protobufBlockSkipList, blockSkipListData); + + blockSkipListBuffer = palloc0(sizeof(StringInfoData)); + blockSkipListBuffer->len = blockSkipListSize; + blockSkipListBuffer->maxlen = blockSkipListSize; + blockSkipListBuffer->data = (char *) blockSkipListData; + + return blockSkipListBuffer; +} + + +/* + * DeserializePostScript deserializes the given postscript buffer and returns + * the size of table footer in tableFooterLength pointer. + */ +void +DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength) +{ + Protobuf__PostScript *protobufPostScript = NULL; + protobufPostScript = protobuf__post_script__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufPostScript == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid postscript buffer"))); + } + + if (protobufPostScript->versionmajor != CSTORE_VERSION_MAJOR || + protobufPostScript->versionminor > CSTORE_VERSION_MINOR) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid column store version number"))); + } + else if (strncmp(protobufPostScript->magicnumber, CSTORE_MAGIC_NUMBER, + NAMEDATALEN) != 0) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid magic number"))); + } + + (*tableFooterLength) = protobufPostScript->tablefooterlength; + + protobuf__post_script__free_unpacked(protobufPostScript, NULL); +} + + +/* + * DeserializeTableFooter deserializes the given buffer and returns the result as + * a TableFooter struct. + */ +TableFooter * +DeserializeTableFooter(StringInfo buffer) +{ + TableFooter *tableFooter = NULL; + Protobuf__TableFooter *protobufTableFooter = NULL; + List *stripeMetadataList = NIL; + uint64 blockRowCount = 0; + uint32 stripeCount = 0; + uint32 stripeIndex = 0; + + protobufTableFooter = protobuf__table_footer__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufTableFooter == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid table footer buffer"))); + } + + if (!protobufTableFooter->has_blockrowcount) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("missing required table footer metadata fields"))); + } + else if (protobufTableFooter->blockrowcount < BLOCK_ROW_COUNT_MINIMUM || + protobufTableFooter->blockrowcount > BLOCK_ROW_COUNT_MAXIMUM) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid block row count"))); + } + blockRowCount = protobufTableFooter->blockrowcount; + + stripeCount = protobufTableFooter->n_stripemetadataarray; + for (stripeIndex = 0; stripeIndex < stripeCount; stripeIndex++) + { + StripeMetadata *stripeMetadata = NULL; + Protobuf__StripeMetadata *protobufStripeMetadata = NULL; + + protobufStripeMetadata = protobufTableFooter->stripemetadataarray[stripeIndex]; + if (!protobufStripeMetadata->has_fileoffset || + !protobufStripeMetadata->has_skiplistlength || + !protobufStripeMetadata->has_datalength || + !protobufStripeMetadata->has_footerlength) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("missing required stripe metadata fields"))); + } + + stripeMetadata = palloc0(sizeof(StripeMetadata)); + stripeMetadata->fileOffset = protobufStripeMetadata->fileoffset; + stripeMetadata->skipListLength = protobufStripeMetadata->skiplistlength; + stripeMetadata->dataLength = protobufStripeMetadata->datalength; + stripeMetadata->footerLength = protobufStripeMetadata->footerlength; + + stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); + } + + protobuf__table_footer__free_unpacked(protobufTableFooter, NULL); + + tableFooter = palloc0(sizeof(TableFooter)); + tableFooter->stripeMetadataList = stripeMetadataList; + tableFooter->blockRowCount = blockRowCount; + + return tableFooter; +} + + +/* + * DeserializeStripeFooter deserializes the given buffer and returns the result + * as a StripeFooter struct. + */ +StripeFooter * +DeserializeStripeFooter(StringInfo buffer) +{ + StripeFooter *stripeFooter = NULL; + Protobuf__StripeFooter *protobufStripeFooter = NULL; + uint64 *skipListSizeArray = NULL; + uint64 *existsSizeArray = NULL; + uint64 *valueSizeArray = NULL; + uint64 sizeArrayLength = 0; + uint32 columnCount = 0; + + protobufStripeFooter = protobuf__stripe_footer__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufStripeFooter == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid stripe footer buffer"))); + } + + columnCount = protobufStripeFooter->n_skiplistsizearray; + if (protobufStripeFooter->n_existssizearray != columnCount || + protobufStripeFooter->n_valuesizearray != columnCount) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("stripe size array lengths don't match"))); + } + + sizeArrayLength = columnCount * sizeof(uint64); + + skipListSizeArray = palloc0(sizeArrayLength); + existsSizeArray = palloc0(sizeArrayLength); + valueSizeArray = palloc0(sizeArrayLength); + + memcpy(skipListSizeArray, protobufStripeFooter->skiplistsizearray, sizeArrayLength); + memcpy(existsSizeArray, protobufStripeFooter->existssizearray, sizeArrayLength); + memcpy(valueSizeArray, protobufStripeFooter->valuesizearray, sizeArrayLength); + + protobuf__stripe_footer__free_unpacked(protobufStripeFooter, NULL); + + stripeFooter = palloc0(sizeof(StripeFooter)); + stripeFooter->skipListSizeArray = skipListSizeArray; + stripeFooter->existsSizeArray = existsSizeArray; + stripeFooter->valueSizeArray = valueSizeArray; + stripeFooter->columnCount = columnCount; + + return stripeFooter; +} + + +/* + * DeserializeBlockCount deserializes the given column skip list buffer and + * returns the number of blocks in column skip list. + */ +uint32 +DeserializeBlockCount(StringInfo buffer) +{ + uint32 blockCount = 0; + Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; + + protobufBlockSkipList = + protobuf__column_block_skip_list__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufBlockSkipList == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid skip list buffer"))); + } + + blockCount = protobufBlockSkipList->n_blockskipnodearray; + + protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); + + return blockCount; +} + + +/* + * DeserializeRowCount deserializes the given column skip list buffer and + * returns the total number of rows in block skip list. + */ +uint32 +DeserializeRowCount(StringInfo buffer) +{ + uint32 rowCount = 0; + Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; + uint32 blockIndex = 0; + uint32 blockCount = 0; + + protobufBlockSkipList = + protobuf__column_block_skip_list__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufBlockSkipList == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid skip list buffer"))); + } + + blockCount = (uint32) protobufBlockSkipList->n_blockskipnodearray; + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = + protobufBlockSkipList->blockskipnodearray[blockIndex]; + rowCount += protobufBlockSkipNode->rowcount; + } + + protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); + + return rowCount; +} + + +/* + * DeserializeColumnSkipList deserializes the given buffer and returns the result as + * a ColumnBlockSkipNode array. If the number of unpacked block skip nodes are not + * equal to the given block count function errors out. + */ +ColumnBlockSkipNode * +DeserializeColumnSkipList(StringInfo buffer, bool typeByValue, int typeLength, + uint32 blockCount) +{ + ColumnBlockSkipNode *blockSkipNodeArray = NULL; + uint32 blockIndex = 0; + Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; + + protobufBlockSkipList = + protobuf__column_block_skip_list__unpack(NULL, buffer->len, + (uint8 *) buffer->data); + if (protobufBlockSkipList == NULL) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("invalid skip list buffer"))); + } + + if (protobufBlockSkipList->n_blockskipnodearray != blockCount) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("block skip node count and block count don't match"))); + } + + blockSkipNodeArray = palloc0(blockCount * sizeof(ColumnBlockSkipNode)); + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; + ColumnBlockSkipNode *blockSkipNode = NULL; + bool hasMinMax = false; + Datum minimumValue = 0; + Datum maximumValue = 0; + + protobufBlockSkipNode = protobufBlockSkipList->blockskipnodearray[blockIndex]; + if (!protobufBlockSkipNode->has_rowcount || + !protobufBlockSkipNode->has_existsblockoffset || + !protobufBlockSkipNode->has_valueblockoffset || + !protobufBlockSkipNode->has_existslength || + !protobufBlockSkipNode->has_valuelength || + !protobufBlockSkipNode->has_valuecompressiontype) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("missing required block skip node metadata"))); + } + + if (protobufBlockSkipNode->has_minimumvalue != + protobufBlockSkipNode->has_maximumvalue) + { + ereport(ERROR, (errmsg("could not unpack column store"), + errdetail("has minimum and has maximum fields " + "don't match"))); + } + + hasMinMax = protobufBlockSkipNode->has_minimumvalue; + if (hasMinMax) + { + minimumValue = ProtobufBinaryToDatum(protobufBlockSkipNode->minimumvalue, + typeByValue, typeLength); + maximumValue = ProtobufBinaryToDatum(protobufBlockSkipNode->maximumvalue, + typeByValue, typeLength); + } + + blockSkipNode = &blockSkipNodeArray[blockIndex]; + blockSkipNode->rowCount = protobufBlockSkipNode->rowcount; + blockSkipNode->hasMinMax = hasMinMax; + blockSkipNode->minimumValue = minimumValue; + blockSkipNode->maximumValue = maximumValue; + blockSkipNode->existsBlockOffset = protobufBlockSkipNode->existsblockoffset; + blockSkipNode->valueBlockOffset = protobufBlockSkipNode->valueblockoffset; + blockSkipNode->existsLength = protobufBlockSkipNode->existslength; + blockSkipNode->valueLength = protobufBlockSkipNode->valuelength; + blockSkipNode->valueCompressionType = + (CompressionType) protobufBlockSkipNode->valuecompressiontype; + } + + protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); + + return blockSkipNodeArray; +} + + +/* Converts a datum to a ProtobufCBinaryData. */ +static ProtobufCBinaryData +DatumToProtobufBinary(Datum datum, bool datumTypeByValue, int datumTypeLength) +{ + ProtobufCBinaryData protobufBinary = {0, 0}; + + int datumLength = att_addlength_datum(0, datumTypeLength, datum); + char *datumBuffer = palloc0(datumLength); + + if (datumTypeLength > 0) + { + if (datumTypeByValue) + { + store_att_byval(datumBuffer, datum, datumTypeLength); + } + else + { + memcpy(datumBuffer, DatumGetPointer(datum), datumTypeLength); + } + } + else + { + memcpy(datumBuffer, DatumGetPointer(datum), datumLength); + } + + protobufBinary.data = (uint8 *) datumBuffer; + protobufBinary.len = datumLength; + + return protobufBinary; +} + + +/* Converts the given ProtobufCBinaryData to a Datum. */ +static Datum +ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, bool datumTypeByValue, + int datumTypeLength) +{ + Datum datum = 0; + + /* + * We copy the protobuf data so the result of this function lives even + * after the unpacked protobuf struct is freed. + */ + char *binaryDataCopy = palloc0(protobufBinary.len); + memcpy(binaryDataCopy, protobufBinary.data, protobufBinary.len); + + datum = fetch_att(binaryDataCopy, datumTypeByValue, datumTypeLength); + + return datum; +} diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h new file mode 100644 index 000000000..421f8ddff --- /dev/null +++ b/cstore_metadata_serialization.h @@ -0,0 +1,42 @@ +/*------------------------------------------------------------------------- + * + * cstore_metadata_serialization.h + * + * Type and function declarations to serialize/deserialize cstore metadata. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_SERIALIZATION_H +#define CSTORE_SERIALIZATION_H + +#include "catalog/pg_attribute.h" +#include "nodes/pg_list.h" +#include "lib/stringinfo.h" +#include "cstore_fdw.h" + + +/* Function declarations for metadata serialization */ +extern StringInfo SerializePostScript(uint64 tableFooterLength); +extern StringInfo SerializeTableFooter(TableFooter *tableFooter); +extern StringInfo SerializeStripeFooter(StripeFooter *stripeFooter); +extern StringInfo SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, + uint32 blockCount, bool typeByValue, + int typeLength); + +/* Function declarations for metadata deserialization */ +extern void DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength); +extern TableFooter * DeserializeTableFooter(StringInfo buffer); +extern uint32 DeserializeBlockCount(StringInfo buffer); +extern uint32 DeserializeRowCount(StringInfo buffer); +extern StripeFooter * DeserializeStripeFooter(StringInfo buffer); +extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, + bool typeByValue, int typeLength, + uint32 blockCount); + + +#endif /* CSTORE_SERIALIZATION_H */ diff --git a/cstore_reader.c b/cstore_reader.c new file mode 100644 index 000000000..7e9c6bcfd --- /dev/null +++ b/cstore_reader.c @@ -0,0 +1,1383 @@ +/*------------------------------------------------------------------------- + * + * cstore_reader.c + * + * This file contains function definitions for reading cstore files. This + * includes the logic for reading file level metadata, reading row stripes, + * and skipping unrelated row blocks and columns. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore_fdw.h" +#include "cstore_metadata_serialization.h" +#include "cstore_version_compat.h" + +#include "access/nbtree.h" +#include "access/skey.h" +#include "commands/defrem.h" +#include "nodes/makefuncs.h" +#if PG_VERSION_NUM >= 120000 +#include "nodes/pathnodes.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#else +#include "optimizer/clauses.h" +#include "optimizer/predtest.h" +#include "optimizer/var.h" +#endif +#include "optimizer/restrictinfo.h" +#include "port.h" +#include "storage/fd.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + + +/* static function declarations */ +static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, + StripeMetadata *stripeMetadata, + TupleDesc tupleDescriptor, + List *projectedColumnList, + List *whereClauseList); +static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, + uint64 blockIndex, uint64 blockRowIndex, + ColumnBlockData **blockDataArray, + Datum *columnValues, bool *columnNulls); +static ColumnBuffers * LoadColumnBuffers(FILE *tableFile, + ColumnBlockSkipNode *blockSkipNodeArray, + uint32 blockCount, uint64 existsFileOffset, + uint64 valueFileOffset, + Form_pg_attribute attributeForm); +static StripeFooter * LoadStripeFooter(FILE *tableFile, StripeMetadata *stripeMetadata, + uint32 columnCount); +static StripeSkipList * LoadStripeSkipList(FILE *tableFile, + StripeMetadata *stripeMetadata, + StripeFooter *stripeFooter, + uint32 columnCount, + bool *projectedColumnMask, + TupleDesc tupleDescriptor); +static bool * SelectedBlockMask(StripeSkipList *stripeSkipList, + List *projectedColumnList, List *whereClauseList); +static List * BuildRestrictInfoList(List *whereClauseList); +static Node * BuildBaseConstraint(Var *variable); +static OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber); +static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber); +static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue); +static StripeSkipList * SelectedBlockSkipList(StripeSkipList *stripeSkipList, + bool *projectedColumnMask, + bool *selectedBlockMask); +static uint32 StripeSkipListRowCount(StripeSkipList *stripeSkipList); +static bool * ProjectedColumnMask(uint32 columnCount, List *projectedColumnList); +static void DeserializeBoolArray(StringInfo boolArrayBuffer, bool *boolArray, + uint32 boolArrayLength); +static void DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, + uint32 datumCount, bool datumTypeByValue, + int datumTypeLength, char datumTypeAlign, + Datum *datumArray); +static void DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, + uint32 rowCount, ColumnBlockData **blockDataArray, + TupleDesc tupleDescriptor); +static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, + Form_pg_attribute attributeForm); +static int64 FILESize(FILE *file); +static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size); +static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, + uint32 columnCount); +static uint64 StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata); + + +/* + * CStoreBeginRead initializes a cstore read operation. This function returns a + * read handle that's used during reading rows and finishing the read operation. + */ +TableReadState * +CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, + List *projectedColumnList, List *whereClauseList) +{ + TableReadState *readState = NULL; + TableFooter *tableFooter = NULL; + FILE *tableFile = NULL; + MemoryContext stripeReadContext = NULL; + uint32 columnCount = 0; + bool *projectedColumnMask = NULL; + ColumnBlockData **blockDataArray = NULL; + + StringInfo tableFooterFilename = makeStringInfo(); + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + tableFooter = CStoreReadFooter(tableFooterFilename); + + pfree(tableFooterFilename->data); + pfree(tableFooterFilename); + + tableFile = AllocateFile(filename, PG_BINARY_R); + if (tableFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + filename))); + } + + /* + * We allocate all stripe specific data in the stripeReadContext, and reset + * this memory context before loading a new stripe. This is to avoid memory + * leaks. + */ + stripeReadContext = AllocSetContextCreate(CurrentMemoryContext, + "Stripe Read Memory Context", + ALLOCSET_DEFAULT_SIZES); + + columnCount = tupleDescriptor->natts; + projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); + blockDataArray = CreateEmptyBlockDataArray(columnCount, projectedColumnMask, + tableFooter->blockRowCount); + + readState = palloc0(sizeof(TableReadState)); + readState->tableFile = tableFile; + readState->tableFooter = tableFooter; + readState->projectedColumnList = projectedColumnList; + readState->whereClauseList = whereClauseList; + readState->stripeBuffers = NULL; + readState->readStripeCount = 0; + readState->stripeReadRowCount = 0; + readState->tupleDescriptor = tupleDescriptor; + readState->stripeReadContext = stripeReadContext; + readState->blockDataArray = blockDataArray; + readState->deserializedBlockIndex = -1; + + return readState; +} + + +/* + * CStoreReadFooter reads the cstore file footer from the given file. First, the + * function reads the last byte of the file as the postscript size. Then, the + * function reads the postscript. Last, the function reads and deserializes the + * footer. + */ +TableFooter * +CStoreReadFooter(StringInfo tableFooterFilename) +{ + TableFooter *tableFooter = NULL; + FILE *tableFooterFile = NULL; + uint64 footerOffset = 0; + uint64 footerLength = 0; + StringInfo postscriptBuffer = NULL; + StringInfo postscriptSizeBuffer = NULL; + uint64 postscriptSizeOffset = 0; + uint8 postscriptSize = 0; + uint64 footerFileSize = 0; + uint64 postscriptOffset = 0; + StringInfo footerBuffer = NULL; + int freeResult = 0; + + tableFooterFile = AllocateFile(tableFooterFilename->data, PG_BINARY_R); + if (tableFooterFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", + tableFooterFilename->data), + errhint("Try copying in data to the table."))); + } + + footerFileSize = FILESize(tableFooterFile); + if (footerFileSize < CSTORE_POSTSCRIPT_SIZE_LENGTH) + { + ereport(ERROR, (errmsg("invalid cstore file"))); + } + + postscriptSizeOffset = footerFileSize - CSTORE_POSTSCRIPT_SIZE_LENGTH; + postscriptSizeBuffer = ReadFromFile(tableFooterFile, postscriptSizeOffset, + CSTORE_POSTSCRIPT_SIZE_LENGTH); + memcpy(&postscriptSize, postscriptSizeBuffer->data, CSTORE_POSTSCRIPT_SIZE_LENGTH); + if (postscriptSize + CSTORE_POSTSCRIPT_SIZE_LENGTH > footerFileSize) + { + ereport(ERROR, (errmsg("invalid postscript size"))); + } + + postscriptOffset = footerFileSize - (CSTORE_POSTSCRIPT_SIZE_LENGTH + postscriptSize); + postscriptBuffer = ReadFromFile(tableFooterFile, postscriptOffset, postscriptSize); + + DeserializePostScript(postscriptBuffer, &footerLength); + if (footerLength + postscriptSize + CSTORE_POSTSCRIPT_SIZE_LENGTH > footerFileSize) + { + ereport(ERROR, (errmsg("invalid footer size"))); + } + + footerOffset = postscriptOffset - footerLength; + footerBuffer = ReadFromFile(tableFooterFile, footerOffset, footerLength); + tableFooter = DeserializeTableFooter(footerBuffer); + + freeResult = FreeFile(tableFooterFile); + if (freeResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not close file: %m"))); + } + + return tableFooter; +} + + +/* + * CStoreReadNextRow tries to read a row from the cstore file. On success, it sets + * column values and nulls, and returns true. If there are no more rows to read, + * the function returns false. + */ +bool +CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNulls) +{ + uint32 blockIndex = 0; + uint32 blockRowIndex = 0; + TableFooter *tableFooter = readState->tableFooter; + MemoryContext oldContext = NULL; + + /* + * If no stripes are loaded, load the next non-empty stripe. Note that when + * loading stripes, we skip over blocks whose contents can be filtered with + * the query's restriction qualifiers. So, even when a stripe is physically + * not empty, we may end up loading it as an empty stripe. + */ + while (readState->stripeBuffers == NULL) + { + StripeBuffers *stripeBuffers = NULL; + StripeMetadata *stripeMetadata = NULL; + List *stripeMetadataList = tableFooter->stripeMetadataList; + uint32 stripeCount = list_length(stripeMetadataList); + + /* if we have read all stripes, return false */ + if (readState->readStripeCount == stripeCount) + { + return false; + } + + oldContext = MemoryContextSwitchTo(readState->stripeReadContext); + MemoryContextReset(readState->stripeReadContext); + + stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); + stripeBuffers = LoadFilteredStripeBuffers(readState->tableFile, stripeMetadata, + readState->tupleDescriptor, + readState->projectedColumnList, + readState->whereClauseList); + readState->readStripeCount++; + + MemoryContextSwitchTo(oldContext); + + if (stripeBuffers->rowCount != 0) + { + readState->stripeBuffers = stripeBuffers; + readState->stripeReadRowCount = 0; + readState->deserializedBlockIndex = -1; + ResetUncompressedBlockData(readState->blockDataArray, + stripeBuffers->columnCount); + break; + } + } + + blockIndex = readState->stripeReadRowCount / tableFooter->blockRowCount; + blockRowIndex = readState->stripeReadRowCount % tableFooter->blockRowCount; + + if (blockIndex != readState->deserializedBlockIndex) + { + uint32 lastBlockIndex = 0; + uint32 blockRowCount = 0; + uint32 stripeRowCount = 0; + + stripeRowCount = readState->stripeBuffers->rowCount; + lastBlockIndex = stripeRowCount / tableFooter->blockRowCount; + if (blockIndex == lastBlockIndex) + { + blockRowCount = stripeRowCount % tableFooter->blockRowCount; + } + else + { + blockRowCount = tableFooter->blockRowCount; + } + + oldContext = MemoryContextSwitchTo(readState->stripeReadContext); + + DeserializeBlockData(readState->stripeBuffers, blockIndex, + blockRowCount, readState->blockDataArray, + readState->tupleDescriptor); + + MemoryContextSwitchTo(oldContext); + + readState->deserializedBlockIndex = blockIndex; + } + + ReadStripeNextRow(readState->stripeBuffers, readState->projectedColumnList, + blockIndex, blockRowIndex, readState->blockDataArray, + columnValues, columnNulls); + + /* + * If we finished reading the current stripe, set stripe data to NULL. That + * way, we will load a new stripe the next time this function gets called. + */ + readState->stripeReadRowCount++; + if (readState->stripeReadRowCount == readState->stripeBuffers->rowCount) + { + readState->stripeBuffers = NULL; + } + + return true; +} + + +/* Finishes a cstore read operation. */ +void +CStoreEndRead(TableReadState *readState) +{ + int columnCount = readState->tupleDescriptor->natts; + + MemoryContextDelete(readState->stripeReadContext); + FreeFile(readState->tableFile); + list_free_deep(readState->tableFooter->stripeMetadataList); + FreeColumnBlockDataArray(readState->blockDataArray, columnCount); + pfree(readState->tableFooter); + pfree(readState); +} + + +/* + * CreateEmptyBlockDataArray creates data buffers to keep deserialized exist and + * value arrays for requested columns in columnMask. + */ +ColumnBlockData ** +CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, uint32 blockRowCount) +{ + uint32 columnIndex = 0; + ColumnBlockData **blockDataArray = palloc0(columnCount * sizeof(ColumnBlockData*)); + + /* allocate block memory for deserialized data */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + if (columnMask[columnIndex]) + { + ColumnBlockData *blockData = palloc0(sizeof(ColumnBlockData)); + + blockData->existsArray = palloc0(blockRowCount * sizeof(bool)); + blockData->valueArray = palloc0(blockRowCount * sizeof(Datum)); + blockData->valueBuffer = NULL; + blockDataArray[columnIndex] = blockData; + } + } + + return blockDataArray; +} + + +/* + * FreeColumnBlockDataArray deallocates data buffers to keep deserialized exist and + * value arrays for requested columns in columnMask. + * ColumnBlockData->serializedValueBuffer lives in memory read/write context + * so it is deallocated automatically when the context is deleted. + */ +void +FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) +{ + uint32 columnIndex = 0; + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + if (blockData != NULL) + { + pfree(blockData->existsArray); + pfree(blockData->valueArray); + pfree(blockData); + } + } + + pfree(blockDataArray); +} + + +/* CStoreTableRowCount returns the exact row count of a table using skiplists */ +uint64 +CStoreTableRowCount(const char *filename) +{ + TableFooter *tableFooter = NULL; + FILE *tableFile; + ListCell *stripeMetadataCell = NULL; + uint64 totalRowCount = 0; + + StringInfo tableFooterFilename = makeStringInfo(); + + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + tableFooter = CStoreReadFooter(tableFooterFilename); + + pfree(tableFooterFilename->data); + pfree(tableFooterFilename); + + tableFile = AllocateFile(filename, PG_BINARY_R); + if (tableFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for reading: %m", filename))); + } + + foreach(stripeMetadataCell, tableFooter->stripeMetadataList) + { + StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); + totalRowCount += StripeRowCount(tableFile, stripeMetadata); + } + + FreeFile(tableFile); + + return totalRowCount; +} + + +/* + * StripeRowCount reads serialized stripe footer, the first column's + * skip list, and returns number of rows for given stripe. + */ +static uint64 +StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata) +{ + uint64 rowCount = 0; + StripeFooter *stripeFooter = NULL; + StringInfo footerBuffer = NULL; + StringInfo firstColumnSkipListBuffer = NULL; + uint64 footerOffset = 0; + + footerOffset += stripeMetadata->fileOffset; + footerOffset += stripeMetadata->skipListLength; + footerOffset += stripeMetadata->dataLength; + + footerBuffer = ReadFromFile(tableFile, footerOffset, stripeMetadata->footerLength); + stripeFooter = DeserializeStripeFooter(footerBuffer); + + firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, + stripeFooter->skipListSizeArray[0]); + rowCount = DeserializeRowCount(firstColumnSkipListBuffer); + + return rowCount; +} + + +/* + * LoadFilteredStripeBuffers reads serialized stripe data from the given file. + * The function skips over blocks whose rows are refuted by restriction qualifiers, + * and only loads columns that are projected in the query. + */ +static StripeBuffers * +LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, + TupleDesc tupleDescriptor, List *projectedColumnList, + List *whereClauseList) +{ + StripeBuffers *stripeBuffers = NULL; + ColumnBuffers **columnBuffersArray = NULL; + uint64 currentColumnFileOffset = 0; + uint32 columnIndex = 0; + uint32 columnCount = tupleDescriptor->natts; + + StripeFooter *stripeFooter = LoadStripeFooter(tableFile, stripeMetadata, + columnCount); + bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); + + StripeSkipList *stripeSkipList = LoadStripeSkipList(tableFile, stripeMetadata, + stripeFooter, columnCount, + projectedColumnMask, + tupleDescriptor); + + bool *selectedBlockMask = SelectedBlockMask(stripeSkipList, projectedColumnList, + whereClauseList); + + StripeSkipList *selectedBlockSkipList = + SelectedBlockSkipList(stripeSkipList, projectedColumnMask, + selectedBlockMask); + + /* load column data for projected columns */ + columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); + currentColumnFileOffset = stripeMetadata->fileOffset + stripeMetadata->skipListLength; + + for (columnIndex = 0; columnIndex < stripeFooter->columnCount; columnIndex++) + { + uint64 existsSize = stripeFooter->existsSizeArray[columnIndex]; + uint64 valueSize = stripeFooter->valueSizeArray[columnIndex]; + uint64 existsFileOffset = currentColumnFileOffset; + uint64 valueFileOffset = currentColumnFileOffset + existsSize; + + if (projectedColumnMask[columnIndex]) + { + ColumnBlockSkipNode *blockSkipNode = + selectedBlockSkipList->blockSkipNodeArray[columnIndex]; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + uint32 blockCount = selectedBlockSkipList->blockCount; + + ColumnBuffers *columnBuffers = LoadColumnBuffers(tableFile, blockSkipNode, + blockCount, + existsFileOffset, + valueFileOffset, + attributeForm); + + columnBuffersArray[columnIndex] = columnBuffers; + } + + currentColumnFileOffset += existsSize; + currentColumnFileOffset += valueSize; + } + + stripeBuffers = palloc0(sizeof(StripeBuffers)); + stripeBuffers->columnCount = columnCount; + stripeBuffers->rowCount = StripeSkipListRowCount(selectedBlockSkipList); + stripeBuffers->columnBuffersArray = columnBuffersArray; + + return stripeBuffers; +} + + +/* + * ReadStripeNextRow reads the next row from the given stripe, finds the projected + * column values within this row, and accordingly sets the column values and nulls. + * Note that this function sets the values for all non-projected columns to null. + */ +static void +ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, + uint64 blockIndex, uint64 blockRowIndex, + ColumnBlockData **blockDataArray, Datum *columnValues, + bool *columnNulls) +{ + ListCell *projectedColumnCell = NULL; + + /* set all columns to null by default */ + memset(columnNulls, 1, stripeBuffers->columnCount * sizeof(bool)); + + foreach(projectedColumnCell, projectedColumnList) + { + Var *projectedColumn = lfirst(projectedColumnCell); + uint32 projectedColumnIndex = projectedColumn->varattno - 1; + ColumnBlockData *blockData = blockDataArray[projectedColumnIndex]; + + if (blockData->existsArray[blockRowIndex]) + { + columnValues[projectedColumnIndex] = blockData->valueArray[blockRowIndex]; + columnNulls[projectedColumnIndex] = false; + } + } +} + + +/* + * LoadColumnBuffers reads serialized column data from the given file. These + * column data are laid out as sequential blocks in the file; and block positions + * and lengths are retrieved from the column block skip node array. + */ +static ColumnBuffers * +LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, + uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, + Form_pg_attribute attributeForm) +{ + ColumnBuffers *columnBuffers = NULL; + uint32 blockIndex = 0; + ColumnBlockBuffers **blockBuffersArray = + palloc0(blockCount * sizeof(ColumnBlockBuffers *)); + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + blockBuffersArray[blockIndex] = palloc0(sizeof(ColumnBlockBuffers)); + } + + /* + * We first read the "exists" blocks. We don't read "values" array here, + * because "exists" blocks are stored sequentially on disk, and we want to + * minimize disk seeks. + */ + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + uint64 existsOffset = existsFileOffset + blockSkipNode->existsBlockOffset; + StringInfo rawExistsBuffer = ReadFromFile(tableFile, existsOffset, + blockSkipNode->existsLength); + + blockBuffersArray[blockIndex]->existsBuffer = rawExistsBuffer; + } + + /* then read "values" blocks, which are also stored sequentially on disk */ + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + CompressionType compressionType = blockSkipNode->valueCompressionType; + uint64 valueOffset = valueFileOffset + blockSkipNode->valueBlockOffset; + StringInfo rawValueBuffer = ReadFromFile(tableFile, valueOffset, + blockSkipNode->valueLength); + + blockBuffersArray[blockIndex]->valueBuffer = rawValueBuffer; + blockBuffersArray[blockIndex]->valueCompressionType = compressionType; + } + + columnBuffers = palloc0(sizeof(ColumnBuffers)); + columnBuffers->blockBuffersArray = blockBuffersArray; + + return columnBuffers; +} + + +/* Reads and returns the given stripe's footer. */ +static StripeFooter * +LoadStripeFooter(FILE *tableFile, StripeMetadata *stripeMetadata, + uint32 columnCount) +{ + StripeFooter *stripeFooter = NULL; + StringInfo footerBuffer = NULL; + uint64 footerOffset = 0; + + footerOffset += stripeMetadata->fileOffset; + footerOffset += stripeMetadata->skipListLength; + footerOffset += stripeMetadata->dataLength; + + footerBuffer = ReadFromFile(tableFile, footerOffset, stripeMetadata->footerLength); + stripeFooter = DeserializeStripeFooter(footerBuffer); + if (stripeFooter->columnCount > columnCount) + { + ereport(ERROR, (errmsg("stripe footer column count and table column count " + "don't match"))); + } + + return stripeFooter; +} + + +/* Reads the skip list for the given stripe. */ +static StripeSkipList * +LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, + StripeFooter *stripeFooter, uint32 columnCount, + bool *projectedColumnMask, + TupleDesc tupleDescriptor) +{ + StripeSkipList *stripeSkipList = NULL; + ColumnBlockSkipNode **blockSkipNodeArray = NULL; + StringInfo firstColumnSkipListBuffer = NULL; + uint64 currentColumnSkipListFileOffset = 0; + uint32 columnIndex = 0; + uint32 stripeBlockCount = 0; + uint32 stripeColumnCount = stripeFooter->columnCount; + + /* deserialize block count */ + firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, + stripeFooter->skipListSizeArray[0]); + stripeBlockCount = DeserializeBlockCount(firstColumnSkipListBuffer); + + /* deserialize column skip lists */ + blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + currentColumnSkipListFileOffset = stripeMetadata->fileOffset; + + for (columnIndex = 0; columnIndex < stripeColumnCount; columnIndex++) + { + uint64 columnSkipListSize = stripeFooter->skipListSizeArray[columnIndex]; + bool firstColumn = columnIndex == 0; + + /* + * Only selected columns' column skip lists are read. However, the first + * column's skip list is read regardless of being selected. It is used by + * StripeSkipListRowCount later. + */ + if (projectedColumnMask[columnIndex] || firstColumn) + { + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + + StringInfo columnSkipListBuffer = + ReadFromFile(tableFile, currentColumnSkipListFileOffset, + columnSkipListSize); + ColumnBlockSkipNode *columnSkipList = + DeserializeColumnSkipList(columnSkipListBuffer, attributeForm->attbyval, + attributeForm->attlen, stripeBlockCount); + blockSkipNodeArray[columnIndex] = columnSkipList; + } + + currentColumnSkipListFileOffset += columnSkipListSize; + } + + /* table contains additional columns added after this stripe is created */ + for (columnIndex = stripeColumnCount; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *columnSkipList = NULL; + uint32 blockIndex = 0; + bool firstColumn = columnIndex == 0; + + /* no need to create ColumnBlockSkipList if the column is not selected */ + if (!projectedColumnMask[columnIndex] && !firstColumn) + { + blockSkipNodeArray[columnIndex] = NULL; + continue; + } + + /* create empty ColumnBlockSkipNode for missing columns*/ + columnSkipList = palloc0(stripeBlockCount * sizeof(ColumnBlockSkipNode)); + + for (blockIndex = 0; blockIndex < stripeBlockCount; blockIndex++) + { + columnSkipList[blockIndex].rowCount = 0; + columnSkipList[blockIndex].hasMinMax = false; + columnSkipList[blockIndex].minimumValue = 0; + columnSkipList[blockIndex].maximumValue = 0; + columnSkipList[blockIndex].existsBlockOffset = 0; + columnSkipList[blockIndex].valueBlockOffset = 0; + columnSkipList[blockIndex].existsLength = 0; + columnSkipList[blockIndex].valueLength = 0; + columnSkipList[blockIndex].valueCompressionType = COMPRESSION_NONE; + } + blockSkipNodeArray[columnIndex] = columnSkipList; + } + + stripeSkipList = palloc0(sizeof(StripeSkipList)); + stripeSkipList->blockSkipNodeArray = blockSkipNodeArray; + stripeSkipList->columnCount = columnCount; + stripeSkipList->blockCount = stripeBlockCount; + + return stripeSkipList; +} + + +/* + * SelectedBlockMask walks over each column's blocks and checks if a block can + * be filtered without reading its data. The filtering happens when all rows in + * the block can be refuted by the given qualifier conditions. + */ +static bool * +SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, + List *whereClauseList) +{ + bool *selectedBlockMask = NULL; + ListCell *columnCell = NULL; + uint32 blockIndex = 0; + List *restrictInfoList = BuildRestrictInfoList(whereClauseList); + + selectedBlockMask = palloc0(stripeSkipList->blockCount * sizeof(bool)); + memset(selectedBlockMask, true, stripeSkipList->blockCount * sizeof(bool)); + + foreach(columnCell, projectedColumnList) + { + Var *column = lfirst(columnCell); + uint32 columnIndex = column->varattno - 1; + FmgrInfo *comparisonFunction = NULL; + Node *baseConstraint = NULL; + + /* if this column's data type doesn't have a comparator, skip it */ + comparisonFunction = GetFunctionInfoOrNull(column->vartype, BTREE_AM_OID, + BTORDER_PROC); + if (comparisonFunction == NULL) + { + continue; + } + + baseConstraint = BuildBaseConstraint(column); + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + bool predicateRefuted = false; + List *constraintList = NIL; + ColumnBlockSkipNode *blockSkipNodeArray = + stripeSkipList->blockSkipNodeArray[columnIndex]; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + /* + * A column block with comparable data type can miss min/max values + * if all values in the block are NULL. + */ + if (!blockSkipNode->hasMinMax) + { + continue; + } + + UpdateConstraint(baseConstraint, blockSkipNode->minimumValue, + blockSkipNode->maximumValue); + + constraintList = list_make1(baseConstraint); +#if (PG_VERSION_NUM >= 100000) + predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList, false); +#else + predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList); +#endif + if (predicateRefuted) + { + selectedBlockMask[blockIndex] = false; + } + } + } + + return selectedBlockMask; +} + + +/* + * GetFunctionInfoOrNull first resolves the operator for the given data type, + * access method, and support procedure. The function then uses the resolved + * operator's identifier to fill in a function manager object, and returns + * this object. This function is based on a similar function from CitusDB's code. + */ +FmgrInfo * +GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, int16 procedureId) +{ + FmgrInfo *functionInfo = NULL; + Oid operatorClassId = InvalidOid; + Oid operatorFamilyId = InvalidOid; + Oid operatorId = InvalidOid; + + /* get default operator class from pg_opclass for datum type */ + operatorClassId = GetDefaultOpClass(typeId, accessMethodId); + if (operatorClassId == InvalidOid) + { + return NULL; + } + + operatorFamilyId = get_opclass_family(operatorClassId); + if (operatorFamilyId == InvalidOid) + { + return NULL; + } + + operatorId = get_opfamily_proc(operatorFamilyId, typeId, typeId, procedureId); + if (operatorId != InvalidOid) + { + functionInfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo)); + + /* fill in the FmgrInfo struct using the operatorId */ + fmgr_info(operatorId, functionInfo); + } + + return functionInfo; +} + + +/* + * BuildRestrictInfoList builds restrict info list using the selection criteria, + * and then return this list. The function is copied from CitusDB's shard pruning + * logic. + */ +static List * +BuildRestrictInfoList(List *whereClauseList) +{ + List *restrictInfoList = NIL; + + ListCell *qualCell = NULL; + foreach(qualCell, whereClauseList) + { + RestrictInfo *restrictInfo = NULL; + Node *qualNode = (Node *) lfirst(qualCell); + + restrictInfo = make_simple_restrictinfo((Expr *) qualNode); + restrictInfoList = lappend(restrictInfoList, restrictInfo); + } + + return restrictInfoList; +} + + +/* + * BuildBaseConstraint builds and returns a base constraint. This constraint + * implements an expression in the form of (var <= max && var >= min), where + * min and max values represent a block's min and max values. These block + * values are filled in after the constraint is built. This function is based + * on a similar function from CitusDB's shard pruning logic. + */ +static Node * +BuildBaseConstraint(Var *variable) +{ + Node *baseConstraint = NULL; + OpExpr *lessThanExpr = NULL; + OpExpr *greaterThanExpr = NULL; + + lessThanExpr = MakeOpExpression(variable, BTLessEqualStrategyNumber); + greaterThanExpr = MakeOpExpression(variable, BTGreaterEqualStrategyNumber); + + baseConstraint = make_and_qual((Node *) lessThanExpr, (Node *) greaterThanExpr); + + return baseConstraint; +} + + +/* + * MakeOpExpression builds an operator expression node. This operator expression + * implements the operator clause as defined by the variable and the strategy + * number. The function is copied from CitusDB's shard pruning logic. + */ +static OpExpr * +MakeOpExpression(Var *variable, int16 strategyNumber) +{ + Oid typeId = variable->vartype; + Oid typeModId = variable->vartypmod; + Oid collationId = variable->varcollid; + + Oid accessMethodId = BTREE_AM_OID; + Oid operatorId = InvalidOid; + Const *constantValue = NULL; + OpExpr *expression = NULL; + + /* Load the operator from system catalogs */ + operatorId = GetOperatorByType(typeId, accessMethodId, strategyNumber); + + constantValue = makeNullConst(typeId, typeModId, collationId); + + /* Now make the expression with the given variable and a null constant */ + expression = (OpExpr *) make_opclause(operatorId, + InvalidOid, /* no result type yet */ + false, /* no return set */ + (Expr *) variable, + (Expr *) constantValue, + InvalidOid, collationId); + + /* Set implementing function id and result type */ + expression->opfuncid = get_opcode(operatorId); + expression->opresulttype = get_func_rettype(expression->opfuncid); + + return expression; +} + + +/* + * GetOperatorByType returns operator Oid for the given type, access method, + * and strategy number. Note that this function incorrectly errors out when + * the given type doesn't have its own operator but can use another compatible + * type's default operator. The function is copied from CitusDB's shard pruning + * logic. + */ +static Oid +GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber) +{ + /* Get default operator class from pg_opclass */ + Oid operatorClassId = GetDefaultOpClass(typeId, accessMethodId); + + Oid operatorFamily = get_opclass_family(operatorClassId); + + Oid operatorId = get_opfamily_member(operatorFamily, typeId, typeId, strategyNumber); + + return operatorId; +} + + +/* + * UpdateConstraint updates the base constraint with the given min/max values. + * The function is copied from CitusDB's shard pruning logic. + */ +static void +UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue) +{ + BoolExpr *andExpr = (BoolExpr *) baseConstraint; + Node *lessThanExpr = (Node *) linitial(andExpr->args); + Node *greaterThanExpr = (Node *) lsecond(andExpr->args); + + Node *minNode = get_rightop((Expr *) greaterThanExpr); + Node *maxNode = get_rightop((Expr *) lessThanExpr); + Const *minConstant = NULL; + Const *maxConstant = NULL; + + Assert(IsA(minNode, Const)); + Assert(IsA(maxNode, Const)); + + minConstant = (Const *) minNode; + maxConstant = (Const *) maxNode; + + minConstant->constvalue = minValue; + maxConstant->constvalue = maxValue; + + minConstant->constisnull = false; + maxConstant->constisnull = false; + + minConstant->constbyval = true; + maxConstant->constbyval = true; +} + + +/* + * SelectedBlockSkipList constructs a new StripeSkipList in which the + * non-selected blocks are removed from the given stripeSkipList. + */ +static StripeSkipList * +SelectedBlockSkipList(StripeSkipList *stripeSkipList, bool *projectedColumnMask, + bool *selectedBlockMask) +{ + StripeSkipList *SelectedBlockSkipList = NULL; + ColumnBlockSkipNode **selectedBlockSkipNodeArray = NULL; + uint32 selectedBlockCount = 0; + uint32 blockIndex = 0; + uint32 columnIndex = 0; + uint32 columnCount = stripeSkipList->columnCount; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + if (selectedBlockMask[blockIndex]) + { + selectedBlockCount++; + } + } + + selectedBlockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + uint32 selectedBlockIndex = 0; + bool firstColumn = columnIndex == 0; + + /* first column's block skip node is always read */ + if (!projectedColumnMask[columnIndex] && !firstColumn) + { + selectedBlockSkipNodeArray[columnIndex] = NULL; + continue; + } + + Assert(stripeSkipList->blockSkipNodeArray[columnIndex] != NULL); + + selectedBlockSkipNodeArray[columnIndex] = palloc0(selectedBlockCount * + sizeof(ColumnBlockSkipNode)); + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + if (selectedBlockMask[blockIndex]) + { + selectedBlockSkipNodeArray[columnIndex][selectedBlockIndex] = + stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; + selectedBlockIndex++; + } + } + } + + SelectedBlockSkipList = palloc0(sizeof(StripeSkipList)); + SelectedBlockSkipList->blockSkipNodeArray = selectedBlockSkipNodeArray; + SelectedBlockSkipList->blockCount = selectedBlockCount; + SelectedBlockSkipList->columnCount = stripeSkipList->columnCount; + + return SelectedBlockSkipList; +} + + +/* + * StripeSkipListRowCount counts the number of rows in the given stripeSkipList. + * To do this, the function finds the first column, and sums up row counts across + * all blocks for that column. + */ +static uint32 +StripeSkipListRowCount(StripeSkipList *stripeSkipList) +{ + uint32 stripeSkipListRowCount = 0; + uint32 blockIndex = 0; + ColumnBlockSkipNode *firstColumnSkipNodeArray = + stripeSkipList->blockSkipNodeArray[0]; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + uint32 blockRowCount = firstColumnSkipNodeArray[blockIndex].rowCount; + stripeSkipListRowCount += blockRowCount; + } + + return stripeSkipListRowCount; +} + + +/* + * ProjectedColumnMask returns a boolean array in which the projected columns + * from the projected column list are marked as true. + */ +static bool * +ProjectedColumnMask(uint32 columnCount, List *projectedColumnList) +{ + bool *projectedColumnMask = palloc0(columnCount * sizeof(bool)); + ListCell *columnCell = NULL; + + foreach(columnCell, projectedColumnList) + { + Var *column = (Var *) lfirst(columnCell); + uint32 columnIndex = column->varattno - 1; + projectedColumnMask[columnIndex] = true; + } + + return projectedColumnMask; +} + + +/* + * DeserializeBoolArray reads an array of bits from the given buffer and stores + * it in provided bool array. + */ +static void +DeserializeBoolArray(StringInfo boolArrayBuffer, bool *boolArray, + uint32 boolArrayLength) +{ + uint32 boolArrayIndex = 0; + + uint32 maximumBoolCount = boolArrayBuffer->len * 8; + if (boolArrayLength > maximumBoolCount) + { + ereport(ERROR, (errmsg("insufficient data for reading boolean array"))); + } + + for (boolArrayIndex = 0; boolArrayIndex < boolArrayLength; boolArrayIndex++) + { + uint32 byteIndex = boolArrayIndex / 8; + uint32 bitIndex = boolArrayIndex % 8; + uint8 bitmask = (1 << bitIndex); + + uint8 shiftedBit = (boolArrayBuffer->data[byteIndex] & bitmask); + if (shiftedBit == 0) + { + boolArray[boolArrayIndex] = false; + } + else + { + boolArray[boolArrayIndex] = true; + } + } +} + + +/* + * DeserializeDatumArray reads an array of datums from the given buffer and stores + * them in provided datumArray. If a value is marked as false in the exists array, + * the function assumes that the datum isn't in the buffer, and simply skips it. + */ +static void +DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCount, + bool datumTypeByValue, int datumTypeLength, + char datumTypeAlign, Datum *datumArray) +{ + uint32 datumIndex = 0; + uint32 currentDatumDataOffset = 0; + + for (datumIndex = 0; datumIndex < datumCount; datumIndex++) + { + char *currentDatumDataPointer = NULL; + + if (!existsArray[datumIndex]) + { + continue; + } + + currentDatumDataPointer = datumBuffer->data + currentDatumDataOffset; + + datumArray[datumIndex] = fetch_att(currentDatumDataPointer, datumTypeByValue, + datumTypeLength); + currentDatumDataOffset = att_addlength_datum(currentDatumDataOffset, + datumTypeLength, + currentDatumDataPointer); + currentDatumDataOffset = att_align_nominal(currentDatumDataOffset, + datumTypeAlign); + + if (currentDatumDataOffset > datumBuffer->len) + { + ereport(ERROR, (errmsg("insufficient data left in datum buffer"))); + } + } +} + + +/* + * DeserializeBlockData deserializes requested data block for all columns and + * stores in blockDataArray. It uncompresses serialized data if necessary. The + * function also deallocates data buffers used for previous block, and compressed + * data buffers for the current block which will not be needed again. If a column + * data is not present serialized buffer, then default value (or null) is used + * to fill value array. + */ +static void +DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, + uint32 rowCount, + ColumnBlockData **blockDataArray, TupleDesc tupleDescriptor) +{ + int columnIndex = 0; + for (columnIndex = 0; columnIndex < stripeBuffers->columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + bool columnAdded = false; + + if ((columnBuffers == NULL) && (blockData != NULL)) + { + columnAdded = true; + } + + if (columnBuffers != NULL) + { + ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + StringInfo valueBuffer = NULL; + + /* free previous block's data buffers */ + pfree(blockData->valueBuffer->data); + pfree(blockData->valueBuffer); + + /* decompress and deserialize current block's data */ + valueBuffer = DecompressBuffer(blockBuffers->valueBuffer, + blockBuffers->valueCompressionType); + + if (blockBuffers->valueCompressionType != COMPRESSION_NONE) + { + /* compressed data is not needed anymore */ + pfree(blockBuffers->valueBuffer->data); + pfree(blockBuffers->valueBuffer); + } + + DeserializeBoolArray(blockBuffers->existsBuffer, blockData->existsArray, + rowCount); + DeserializeDatumArray(valueBuffer, blockData->existsArray, + rowCount, attributeForm->attbyval, + attributeForm->attlen, attributeForm->attalign, + blockData->valueArray); + + /* store current block's data buffer to be freed at next block read */ + blockData->valueBuffer = valueBuffer; + } + else if (columnAdded) + { + /* + * This is a column that was added after creation of this stripe. + * So we use either the default value or NULL. + */ + if (attributeForm->atthasdef) + { + int rowIndex = 0; + + Datum defaultValue = ColumnDefaultValue(tupleDescriptor->constr, + attributeForm); + + for (rowIndex = 0; rowIndex < rowCount; rowIndex++) + { + blockData->existsArray[rowIndex] = true; + blockData->valueArray[rowIndex] = defaultValue; + } + } + else + { + memset(blockData->existsArray, false, rowCount); + } + + } + } +} + + +/* + * ColumnDefaultValue returns default value for given column. Only const values + * are supported. The function errors on any other default value expressions. + */ +static Datum +ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm) +{ + Datum defaultValue = 0; + Node *defaultValueNode = NULL; + int defValIndex = 0; + + for (defValIndex = 0; defValIndex < tupleConstraints->num_defval; defValIndex++) + { + AttrDefault defaultValue = tupleConstraints->defval[defValIndex]; + if (defaultValue.adnum == attributeForm->attnum) + { + defaultValueNode = stringToNode(defaultValue.adbin); + break; + } + } + + Assert(defaultValueNode != NULL); + + /* try reducing the default value node to a const node */ + defaultValueNode = eval_const_expressions(NULL, defaultValueNode); + if (IsA(defaultValueNode, Const)) + { + Const *constNode = (Const *) defaultValueNode; + defaultValue = constNode->constvalue; + } + else + { + const char *columnName = NameStr(attributeForm->attname); + ereport(ERROR, (errmsg("unsupported default value for column \"%s\"", columnName), + errhint("Expression is either mutable or " + "does not evaluate to constant value"))); + } + + return defaultValue; +} + + +/* Returns the size of the given file handle. */ +static int64 +FILESize(FILE *file) +{ + int64 fileSize = 0; + int fseekResult = 0; + + errno = 0; + fseekResult = fseeko(file, 0, SEEK_END); + if (fseekResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not seek in file: %m"))); + } + + fileSize = ftello(file); + if (fileSize == -1) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not get position in file: %m"))); + } + + return fileSize; +} + + +/* Reads the given segment from the given file. */ +static StringInfo +ReadFromFile(FILE *file, uint64 offset, uint32 size) +{ + int fseekResult = 0; + int freadResult = 0; + int fileError = 0; + + StringInfo resultBuffer = makeStringInfo(); + enlargeStringInfo(resultBuffer, size); + resultBuffer->len = size; + + if (size == 0) + { + return resultBuffer; + } + + errno = 0; + fseekResult = fseeko(file, offset, SEEK_SET); + if (fseekResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not seek in file: %m"))); + } + + freadResult = fread(resultBuffer->data, size, 1, file); + if (freadResult != 1) + { + ereport(ERROR, (errmsg("could not read enough data from file"))); + } + + fileError = ferror(file); + if (fileError != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not read file: %m"))); + } + + return resultBuffer; +} + + + + +/* + * ResetUncompressedBlockData iterates over deserialized column block data + * and sets valueBuffer field to empty buffer. This field is allocated in stripe + * memory context and becomes invalid once memory context is reset. + */ +static void +ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount) +{ + uint32 columnIndex = 0; + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + if (blockData != NULL) + { + blockData->valueBuffer = makeStringInfo(); + } + } +} diff --git a/cstore_version_compat.h b/cstore_version_compat.h new file mode 100644 index 000000000..a7f961fcd --- /dev/null +++ b/cstore_version_compat.h @@ -0,0 +1,58 @@ +/*------------------------------------------------------------------------- + * + * cstore_version_compat.h + * + * Compatibility macros for writing code agnostic to PostgreSQL versions + * + * Copyright (c) 2018, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_COMPAT_H +#define CSTORE_COMPAT_H + +#if PG_VERSION_NUM < 100000 + +/* Accessor for the i'th attribute of tupdesc. */ +#define TupleDescAttr(tupdesc, i) ((tupdesc)->attrs[(i)]) + +#endif + +#if PG_VERSION_NUM < 110000 +#define ALLOCSET_DEFAULT_SIZES ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE +#define ACLCHECK_OBJECT_TABLE ACL_KIND_CLASS +#else +#define ACLCHECK_OBJECT_TABLE OBJECT_TABLE + +#define ExplainPropertyLong(qlabel, value, es) \ + ExplainPropertyInteger(qlabel, NULL, value, es) +#endif + +#define PREVIOUS_UTILITY (PreviousProcessUtilityHook != NULL \ + ? PreviousProcessUtilityHook : standard_ProcessUtility) +#if PG_VERSION_NUM >= 100000 +#define CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, \ + destReceiver, completionTag) \ + PREVIOUS_UTILITY(plannedStatement, queryString, context, paramListInfo, \ + queryEnvironment, destReceiver, completionTag) +#else +#define CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, \ + destReceiver, completionTag) \ + PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, \ + completionTag) +#endif + +#if PG_VERSION_NUM < 120000 +#define TTS_EMPTY(slot) ((slot)->tts_isempty) +#define ExecForceStoreHeapTuple(tuple, slot, shouldFree) \ + ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); +#define HeapScanDesc TableScanDesc +#define table_beginscan heap_beginscan +#define table_endscan heap_endscan + +#endif + +#endif /* CSTORE_COMPAT_H */ diff --git a/cstore_writer.c b/cstore_writer.c new file mode 100644 index 000000000..b69064215 --- /dev/null +++ b/cstore_writer.c @@ -0,0 +1,1017 @@ +/*------------------------------------------------------------------------- + * + * cstore_writer.c + * + * This file contains function definitions for writing cstore files. This + * includes the logic for writing file level metadata, writing row stripes, + * and calculating block skip nodes. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore_fdw.h" +#include "cstore_metadata_serialization.h" +#include "cstore_version_compat.h" + +#include +#include "access/nbtree.h" +#include "catalog/pg_collation.h" +#include "commands/defrem.h" +#if PG_VERSION_NUM >= 120000 +#include "optimizer/optimizer.h" +#else +#include "optimizer/var.h" +#endif +#include "port.h" +#include "storage/fd.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + + +static void CStoreWriteFooter(StringInfo footerFileName, TableFooter *tableFooter); +static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, + uint32 blockRowCount, + uint32 columnCount); +static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, + uint32 blockRowCount, + uint32 columnCount); +static StripeMetadata FlushStripe(TableWriteState *writeState); +static StringInfo * CreateSkipListBufferArray(StripeSkipList *stripeSkipList, + TupleDesc tupleDescriptor); +static StripeFooter * CreateStripeFooter(StripeSkipList *stripeSkipList, + StringInfo *skipListBufferArray); +static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); +static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, + bool datumTypeByValue, int datumTypeLength, + char datumTypeAlign); +static void SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, + uint32 rowCount); +static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, + Datum columnValue, bool columnTypeByValue, + int columnTypeLength, Oid columnCollation, + FmgrInfo *comparisonFunction); +static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); +static void AppendStripeMetadata(TableFooter *tableFooter, + StripeMetadata stripeMetadata); +static void WriteToFile(FILE *file, void *data, uint32 dataLength); +static void SyncAndCloseFile(FILE *file); +static StringInfo CopyStringInfo(StringInfo sourceString); + + +/* + * CStoreBeginWrite initializes a cstore data load operation and returns a table + * handle. This handle should be used for adding the row values and finishing the + * data load operation. If the cstore footer file already exists, we read the + * footer and then seek to right after the last stripe where the new stripes + * will be added. + */ +TableWriteState * +CStoreBeginWrite(const char *filename, CompressionType compressionType, + uint64 stripeMaxRowCount, uint32 blockRowCount, + TupleDesc tupleDescriptor) +{ + TableWriteState *writeState = NULL; + FILE *tableFile = NULL; + StringInfo tableFooterFilename = NULL; + TableFooter *tableFooter = NULL; + FmgrInfo **comparisonFunctionArray = NULL; + MemoryContext stripeWriteContext = NULL; + uint64 currentFileOffset = 0; + uint32 columnCount = 0; + uint32 columnIndex = 0; + struct stat statBuffer; + int statResult = 0; + bool *columnMaskArray = NULL; + ColumnBlockData **blockData = NULL; + + tableFooterFilename = makeStringInfo(); + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + statResult = stat(tableFooterFilename->data, &statBuffer); + if (statResult < 0) + { + tableFile = AllocateFile(filename, "w"); + if (tableFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + filename))); + } + + tableFooter = palloc0(sizeof(TableFooter)); + tableFooter->blockRowCount = blockRowCount; + tableFooter->stripeMetadataList = NIL; + } + else + { + tableFile = AllocateFile(filename, "r+"); + if (tableFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + filename))); + } + + tableFooter = CStoreReadFooter(tableFooterFilename); + } + + /* + * If stripeMetadataList is not empty, jump to the position right after + * the last position. + */ + if (tableFooter->stripeMetadataList != NIL) + { + StripeMetadata *lastStripe = NULL; + uint64 lastStripeSize = 0; + int fseekResult = 0; + + lastStripe = llast(tableFooter->stripeMetadataList); + lastStripeSize += lastStripe->skipListLength; + lastStripeSize += lastStripe->dataLength; + lastStripeSize += lastStripe->footerLength; + + currentFileOffset = lastStripe->fileOffset + lastStripeSize; + + errno = 0; + fseekResult = fseeko(tableFile, currentFileOffset, SEEK_SET); + if (fseekResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not seek in file \"%s\": %m", filename))); + } + } + + /* get comparison function pointers for each of the columns */ + columnCount = tupleDescriptor->natts; + comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + FmgrInfo *comparisonFunction = NULL; + FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + + if (!attributeForm->attisdropped) + { + Oid typeId = attributeForm->atttypid; + + comparisonFunction = GetFunctionInfoOrNull(typeId, BTREE_AM_OID, BTORDER_PROC); + } + + comparisonFunctionArray[columnIndex] = comparisonFunction; + } + + /* + * We allocate all stripe specific data in the stripeWriteContext, and + * reset this memory context once we have flushed the stripe to the file. + * This is to avoid memory leaks. + */ + stripeWriteContext = AllocSetContextCreate(CurrentMemoryContext, + "Stripe Write Memory Context", + ALLOCSET_DEFAULT_SIZES); + + columnMaskArray = palloc(columnCount * sizeof(bool)); + memset(columnMaskArray, true, columnCount); + + blockData = CreateEmptyBlockDataArray(columnCount, columnMaskArray, blockRowCount); + + writeState = palloc0(sizeof(TableWriteState)); + writeState->tableFile = tableFile; + writeState->tableFooterFilename = tableFooterFilename; + writeState->tableFooter = tableFooter; + writeState->compressionType = compressionType; + writeState->stripeMaxRowCount = stripeMaxRowCount; + writeState->tupleDescriptor = tupleDescriptor; + writeState->currentFileOffset = currentFileOffset; + writeState->comparisonFunctionArray = comparisonFunctionArray; + writeState->stripeBuffers = NULL; + writeState->stripeSkipList = NULL; + writeState->stripeWriteContext = stripeWriteContext; + writeState->blockDataArray = blockData; + writeState->compressionBuffer = NULL; + + return writeState; +} + + +/* + * CStoreWriteRow adds a row to the cstore file. If the stripe is not initialized, + * we create structures to hold stripe data and skip list. Then, we serialize and + * append data to serialized value buffer for each of the columns and update + * corresponding skip nodes. Then, whole block data is compressed at every + * rowBlockCount insertion. Then, if row count exceeds stripeMaxRowCount, we flush + * the stripe, and add its metadata to the table footer. + */ +void +CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNulls) +{ + uint32 columnIndex = 0; + uint32 blockIndex = 0; + uint32 blockRowIndex = 0; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + StripeSkipList *stripeSkipList = writeState->stripeSkipList; + uint32 columnCount = writeState->tupleDescriptor->natts; + TableFooter *tableFooter = writeState->tableFooter; + const uint32 blockRowCount = tableFooter->blockRowCount; + ColumnBlockData **blockDataArray = writeState->blockDataArray; + MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); + + if (stripeBuffers == NULL) + { + stripeBuffers = CreateEmptyStripeBuffers(writeState->stripeMaxRowCount, + blockRowCount, columnCount); + stripeSkipList = CreateEmptyStripeSkipList(writeState->stripeMaxRowCount, + blockRowCount, columnCount); + writeState->stripeBuffers = stripeBuffers; + writeState->stripeSkipList = stripeSkipList; + writeState->compressionBuffer = makeStringInfo(); + + /* + * serializedValueBuffer lives in stripe write memory context so it needs to be + * initialized when the stripe is created. + */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + blockData->valueBuffer = makeStringInfo(); + } + } + + blockIndex = stripeBuffers->rowCount / blockRowCount; + blockRowIndex = stripeBuffers->rowCount % blockRowCount; + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockData *blockData = blockDataArray[columnIndex]; + ColumnBlockSkipNode **blockSkipNodeArray = stripeSkipList->blockSkipNodeArray; + ColumnBlockSkipNode *blockSkipNode = + &blockSkipNodeArray[columnIndex][blockIndex]; + + if (columnNulls[columnIndex]) + { + blockData->existsArray[blockRowIndex] = false; + } + else + { + FmgrInfo *comparisonFunction = + writeState->comparisonFunctionArray[columnIndex]; + Form_pg_attribute attributeForm = + TupleDescAttr(writeState->tupleDescriptor, columnIndex); + bool columnTypeByValue = attributeForm->attbyval; + int columnTypeLength = attributeForm->attlen; + Oid columnCollation = attributeForm->attcollation; + char columnTypeAlign = attributeForm->attalign; + + blockData->existsArray[blockRowIndex] = true; + + SerializeSingleDatum(blockData->valueBuffer, columnValues[columnIndex], + columnTypeByValue, columnTypeLength, columnTypeAlign); + + UpdateBlockSkipNodeMinMax(blockSkipNode, columnValues[columnIndex], + columnTypeByValue, columnTypeLength, + columnCollation, comparisonFunction); + } + + blockSkipNode->rowCount++; + } + + stripeSkipList->blockCount = blockIndex + 1; + + /* last row of the block is inserted serialize the block */ + if (blockRowIndex == blockRowCount - 1) + { + SerializeBlockData(writeState, blockIndex, blockRowCount); + } + + stripeBuffers->rowCount++; + if (stripeBuffers->rowCount >= writeState->stripeMaxRowCount) + { + StripeMetadata stripeMetadata = FlushStripe(writeState); + MemoryContextReset(writeState->stripeWriteContext); + + /* set stripe data and skip list to NULL so they are recreated next time */ + writeState->stripeBuffers = NULL; + writeState->stripeSkipList = NULL; + + /* + * Append stripeMetadata in old context so next MemoryContextReset + * doesn't free it. + */ + MemoryContextSwitchTo(oldContext); + AppendStripeMetadata(tableFooter, stripeMetadata); + } + else + { + MemoryContextSwitchTo(oldContext); + } +} + + +/* + * CStoreEndWrite finishes a cstore data load operation. If we have an unflushed + * stripe, we flush it. Then, we sync and close the cstore data file. Last, we + * flush the footer to a temporary file, and atomically rename this temporary + * file to the original footer file. + */ +void +CStoreEndWrite(TableWriteState *writeState) +{ + StringInfo tableFooterFilename = NULL; + StringInfo tempTableFooterFileName = NULL; + int renameResult = 0; + int columnCount = writeState->tupleDescriptor->natts; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + + if (stripeBuffers != NULL) + { + MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); + + StripeMetadata stripeMetadata = FlushStripe(writeState); + MemoryContextReset(writeState->stripeWriteContext); + + MemoryContextSwitchTo(oldContext); + AppendStripeMetadata(writeState->tableFooter, stripeMetadata); + } + + SyncAndCloseFile(writeState->tableFile); + + tableFooterFilename = writeState->tableFooterFilename; + tempTableFooterFileName = makeStringInfo(); + appendStringInfo(tempTableFooterFileName, "%s%s", tableFooterFilename->data, + CSTORE_TEMP_FILE_SUFFIX); + + CStoreWriteFooter(tempTableFooterFileName, writeState->tableFooter); + + renameResult = rename(tempTableFooterFileName->data, tableFooterFilename->data); + if (renameResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not rename file \"%s\" to \"%s\": %m", + tempTableFooterFileName->data, + tableFooterFilename->data))); + } + + pfree(tempTableFooterFileName->data); + pfree(tempTableFooterFileName); + + MemoryContextDelete(writeState->stripeWriteContext); + list_free_deep(writeState->tableFooter->stripeMetadataList); + pfree(writeState->tableFooter); + pfree(writeState->tableFooterFilename->data); + pfree(writeState->tableFooterFilename); + pfree(writeState->comparisonFunctionArray); + FreeColumnBlockDataArray(writeState->blockDataArray, columnCount); + pfree(writeState); +} + + +/* + * CStoreWriteFooter writes the given footer to given file. First, the function + * serializes and writes the footer to the file. Then, the function serializes + * and writes the postscript. Then, the function writes the postscript size as + * the last byte of the file. Last, the function syncs and closes the footer file. + */ +static void +CStoreWriteFooter(StringInfo tableFooterFilename, TableFooter *tableFooter) +{ + FILE *tableFooterFile = NULL; + StringInfo tableFooterBuffer = NULL; + StringInfo postscriptBuffer = NULL; + uint8 postscriptSize = 0; + + tableFooterFile = AllocateFile(tableFooterFilename->data, PG_BINARY_W); + if (tableFooterFile == NULL) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + tableFooterFilename->data))); + } + + /* write the footer */ + tableFooterBuffer = SerializeTableFooter(tableFooter); + WriteToFile(tableFooterFile, tableFooterBuffer->data, tableFooterBuffer->len); + + /* write the postscript */ + postscriptBuffer = SerializePostScript(tableFooterBuffer->len); + WriteToFile(tableFooterFile, postscriptBuffer->data, postscriptBuffer->len); + + /* write the 1-byte postscript size */ + Assert(postscriptBuffer->len < CSTORE_POSTSCRIPT_SIZE_MAX); + postscriptSize = postscriptBuffer->len; + WriteToFile(tableFooterFile, &postscriptSize, CSTORE_POSTSCRIPT_SIZE_LENGTH); + + SyncAndCloseFile(tableFooterFile); + + pfree(tableFooterBuffer->data); + pfree(tableFooterBuffer); + pfree(postscriptBuffer->data); + pfree(postscriptBuffer); +} + + +/* + * CreateEmptyStripeBuffers allocates an empty StripeBuffers structure with the given + * column count. + */ +static StripeBuffers * +CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, uint32 blockRowCount, + uint32 columnCount) +{ + StripeBuffers *stripeBuffers = NULL; + uint32 columnIndex = 0; + uint32 maxBlockCount = (stripeMaxRowCount / blockRowCount) + 1; + ColumnBuffers **columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + uint32 blockIndex = 0; + ColumnBlockBuffers **blockBuffersArray = + palloc0(maxBlockCount * sizeof(ColumnBlockBuffers *)); + + for (blockIndex = 0; blockIndex < maxBlockCount; blockIndex++) + { + blockBuffersArray[blockIndex] = palloc0(sizeof(ColumnBlockBuffers)); + blockBuffersArray[blockIndex]->existsBuffer = NULL; + blockBuffersArray[blockIndex]->valueBuffer = NULL; + blockBuffersArray[blockIndex]->valueCompressionType = COMPRESSION_NONE; + } + + columnBuffersArray[columnIndex] = palloc0(sizeof(ColumnBuffers)); + columnBuffersArray[columnIndex]->blockBuffersArray = blockBuffersArray; + } + + stripeBuffers = palloc0(sizeof(StripeBuffers)); + stripeBuffers->columnBuffersArray = columnBuffersArray; + stripeBuffers->columnCount = columnCount; + stripeBuffers->rowCount = 0; + + return stripeBuffers; +} + + +/* + * CreateEmptyStripeSkipList allocates an empty StripeSkipList structure with + * the given column count. This structure has enough blocks to hold statistics + * for stripeMaxRowCount rows. + */ +static StripeSkipList * +CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, + uint32 columnCount) +{ + StripeSkipList *stripeSkipList = NULL; + uint32 columnIndex = 0; + uint32 maxBlockCount = (stripeMaxRowCount / blockRowCount) + 1; + + ColumnBlockSkipNode **blockSkipNodeArray = + palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + blockSkipNodeArray[columnIndex] = + palloc0(maxBlockCount * sizeof(ColumnBlockSkipNode)); + } + + stripeSkipList = palloc0(sizeof(StripeSkipList)); + stripeSkipList->columnCount = columnCount; + stripeSkipList->blockCount = 0; + stripeSkipList->blockSkipNodeArray = blockSkipNodeArray; + + return stripeSkipList; +} + + +/* + * FlushStripe flushes current stripe data into the file. The function first ensures + * the last data block for each column is properly serialized and compressed. Then, + * the function creates the skip list and footer buffers. Finally, the function + * flushes the skip list, data, and footer buffers to the file. + */ +static StripeMetadata +FlushStripe(TableWriteState *writeState) +{ + StripeMetadata stripeMetadata = {0, 0, 0, 0}; + uint64 skipListLength = 0; + uint64 dataLength = 0; + StringInfo *skipListBufferArray = NULL; + StripeFooter *stripeFooter = NULL; + StringInfo stripeFooterBuffer = NULL; + uint32 columnIndex = 0; + uint32 blockIndex = 0; + TableFooter *tableFooter = writeState->tableFooter; + FILE *tableFile = writeState->tableFile; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + StripeSkipList *stripeSkipList = writeState->stripeSkipList; + ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; + TupleDesc tupleDescriptor = writeState->tupleDescriptor; + uint32 columnCount = tupleDescriptor->natts; + uint32 blockCount = stripeSkipList->blockCount; + uint32 blockRowCount = tableFooter->blockRowCount; + uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; + uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; + + /* + * check if the last block needs serialization , the last block was not serialized + * if it was not full yet, e.g. (rowCount > 0) + */ + if (lastBlockRowCount > 0) + { + SerializeBlockData(writeState, lastBlockIndex, lastBlockRowCount); + } + + /* update buffer sizes and positions in stripe skip list */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; + uint64 currentExistsBlockOffset = 0; + uint64 currentValueBlockOffset = 0; + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + uint64 existsBufferSize = blockBuffers->existsBuffer->len; + uint64 valueBufferSize = blockBuffers->valueBuffer->len; + CompressionType valueCompressionType = blockBuffers->valueCompressionType; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + blockSkipNode->existsBlockOffset = currentExistsBlockOffset; + blockSkipNode->existsLength = existsBufferSize; + blockSkipNode->valueBlockOffset = currentValueBlockOffset; + blockSkipNode->valueLength = valueBufferSize; + blockSkipNode->valueCompressionType = valueCompressionType; + + currentExistsBlockOffset += existsBufferSize; + currentValueBlockOffset += valueBufferSize; + } + } + + /* create skip list and footer buffers */ + skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); + stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); + stripeFooterBuffer = SerializeStripeFooter(stripeFooter); + + /* + * Each stripe has three sections: + * (1) Skip list, which contains statistics for each column block, and can + * be used to skip reading row blocks that are refuted by WHERE clause list, + * (2) Data section, in which we store data for each column continuously. + * We store data for each for each column in blocks. For each block, we + * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer + * tells which values are not NULL. "value" buffer contains values for + * present values. For each column, we first store all "exists" buffers, + * and then all "value" buffers. + * (3) Stripe footer, which contains the skip list buffer size, exists buffer + * size, and value buffer size for each of the columns. + * + * We start by flushing the skip list buffers. + */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + StringInfo skipListBuffer = skipListBufferArray[columnIndex]; + WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len); + } + + /* then, we flush the data buffers */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + uint32 blockIndex = 0; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + StringInfo existsBuffer = blockBuffers->existsBuffer; + + WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); + } + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; + StringInfo valueBuffer = blockBuffers->valueBuffer; + + WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); + } + } + + /* finally, we flush the footer buffer */ + WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len); + + /* set stripe metadata */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + skipListLength += stripeFooter->skipListSizeArray[columnIndex]; + dataLength += stripeFooter->existsSizeArray[columnIndex]; + dataLength += stripeFooter->valueSizeArray[columnIndex]; + } + + stripeMetadata.fileOffset = writeState->currentFileOffset; + stripeMetadata.skipListLength = skipListLength; + stripeMetadata.dataLength = dataLength; + stripeMetadata.footerLength = stripeFooterBuffer->len; + + /* advance current file offset */ + writeState->currentFileOffset += skipListLength; + writeState->currentFileOffset += dataLength; + writeState->currentFileOffset += stripeFooterBuffer->len; + + return stripeMetadata; +} + + +/* + * CreateSkipListBufferArray serializes the skip list for each column of the + * given stripe and returns the result as an array. + */ +static StringInfo * +CreateSkipListBufferArray(StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor) +{ + StringInfo *skipListBufferArray = NULL; + uint32 columnIndex = 0; + uint32 columnCount = stripeSkipList->columnCount; + + skipListBufferArray = palloc0(columnCount * sizeof(StringInfo)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + StringInfo skipListBuffer = NULL; + ColumnBlockSkipNode *blockSkipNodeArray = + stripeSkipList->blockSkipNodeArray[columnIndex]; + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + + skipListBuffer = SerializeColumnSkipList(blockSkipNodeArray, + stripeSkipList->blockCount, + attributeForm->attbyval, + attributeForm->attlen); + + skipListBufferArray[columnIndex] = skipListBuffer; + } + + return skipListBufferArray; +} + + +/* Creates and returns the footer for given stripe. */ +static StripeFooter * +CreateStripeFooter(StripeSkipList *stripeSkipList, StringInfo *skipListBufferArray) +{ + StripeFooter *stripeFooter = NULL; + uint32 columnIndex = 0; + uint32 columnCount = stripeSkipList->columnCount; + uint64 *skipListSizeArray = palloc0(columnCount * sizeof(uint64)); + uint64 *existsSizeArray = palloc0(columnCount * sizeof(uint64)); + uint64 *valueSizeArray = palloc0(columnCount * sizeof(uint64)); + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *blockSkipNodeArray = + stripeSkipList->blockSkipNodeArray[columnIndex]; + uint32 blockIndex = 0; + + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + existsSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].existsLength; + valueSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].valueLength; + } + skipListSizeArray[columnIndex] = skipListBufferArray[columnIndex]->len; + } + + stripeFooter = palloc0(sizeof(StripeFooter)); + stripeFooter->columnCount = columnCount; + stripeFooter->skipListSizeArray = skipListSizeArray; + stripeFooter->existsSizeArray = existsSizeArray; + stripeFooter->valueSizeArray = valueSizeArray; + + return stripeFooter; +} + + +/* + * SerializeBoolArray serializes the given boolean array and returns the result + * as a StringInfo. This function packs every 8 boolean values into one byte. + */ +static StringInfo +SerializeBoolArray(bool *boolArray, uint32 boolArrayLength) +{ + StringInfo boolArrayBuffer = NULL; + uint32 boolArrayIndex = 0; + uint32 byteCount = (boolArrayLength + 7) / 8; + + boolArrayBuffer = makeStringInfo(); + enlargeStringInfo(boolArrayBuffer, byteCount); + boolArrayBuffer->len = byteCount; + memset(boolArrayBuffer->data, 0, byteCount); + + for (boolArrayIndex = 0; boolArrayIndex < boolArrayLength; boolArrayIndex++) + { + if (boolArray[boolArrayIndex]) + { + uint32 byteIndex = boolArrayIndex / 8; + uint32 bitIndex = boolArrayIndex % 8; + boolArrayBuffer->data[byteIndex] |= (1 << bitIndex); + } + } + + return boolArrayBuffer; +} + + +/* + * SerializeSingleDatum serializes the given datum value and appends it to the + * provided string info buffer. + */ +static void +SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, + int datumTypeLength, char datumTypeAlign) +{ + uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum); + uint32 datumLengthAligned = att_align_nominal(datumLength, datumTypeAlign); + char *currentDatumDataPointer = NULL; + + enlargeStringInfo(datumBuffer, datumLengthAligned); + + currentDatumDataPointer = datumBuffer->data + datumBuffer->len; + memset(currentDatumDataPointer, 0, datumLengthAligned); + + if (datumTypeLength > 0) + { + if (datumTypeByValue) + { + store_att_byval(currentDatumDataPointer, datum, datumTypeLength); + } + else + { + memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumTypeLength); + } + } + else + { + Assert(!datumTypeByValue); + memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumLength); + } + + datumBuffer->len += datumLengthAligned; +} + + +/* + * SerializeBlockData serializes and compresses block data at given block index with given + * compression type for every column. + */ +static void +SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCount) +{ + uint32 columnIndex = 0; + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + ColumnBlockData **blockDataArray = writeState->blockDataArray; + CompressionType requestedCompressionType = writeState->compressionType; + const uint32 columnCount = stripeBuffers->columnCount; + StringInfo compressionBuffer = writeState->compressionBuffer; + + /* serialize exist values, data values are already serialized */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + ColumnBlockData *blockData = blockDataArray[columnIndex]; + + blockBuffers->existsBuffer = SerializeBoolArray(blockData->existsArray, rowCount); + } + + /* + * check and compress value buffers, if a value buffer is not compressable + * then keep it as uncompressed, store compression information. + */ + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + ColumnBlockData *blockData = blockDataArray[columnIndex]; + StringInfo serializedValueBuffer = NULL; + CompressionType actualCompressionType = COMPRESSION_NONE; + bool compressed = false; + + serializedValueBuffer = blockData->valueBuffer; + + /* the only other supported compression type is pg_lz for now */ + Assert(requestedCompressionType == COMPRESSION_NONE || + requestedCompressionType == COMPRESSION_PG_LZ); + + /* + * if serializedValueBuffer is be compressed, update serializedValueBuffer + * with compressed data and store compression type. + */ + compressed = CompressBuffer(serializedValueBuffer, compressionBuffer, + requestedCompressionType); + if (compressed) + { + serializedValueBuffer = compressionBuffer; + actualCompressionType = COMPRESSION_PG_LZ; + } + + /* store (compressed) value buffer */ + blockBuffers->valueCompressionType = actualCompressionType; + blockBuffers->valueBuffer = CopyStringInfo(serializedValueBuffer); + + /* valueBuffer needs to be reset for next block's data */ + resetStringInfo(blockData->valueBuffer); + } +} + + +/* + * UpdateBlockSkipNodeMinMax takes the given column value, and checks if this + * value falls outside the range of minimum/maximum values of the given column + * block skip node. If it does, the function updates the column block skip node + * accordingly. + */ +static void +UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, Datum columnValue, + bool columnTypeByValue, int columnTypeLength, + Oid columnCollation, FmgrInfo *comparisonFunction) +{ + bool hasMinMax = blockSkipNode->hasMinMax; + Datum previousMinimum = blockSkipNode->minimumValue; + Datum previousMaximum = blockSkipNode->maximumValue; + Datum currentMinimum = 0; + Datum currentMaximum = 0; + + /* if type doesn't have a comparison function, skip min/max values */ + if (comparisonFunction == NULL) + { + return; + } + + if (!hasMinMax) + { + currentMinimum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + currentMaximum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + Datum minimumComparisonDatum = FunctionCall2Coll(comparisonFunction, + columnCollation, columnValue, + previousMinimum); + Datum maximumComparisonDatum = FunctionCall2Coll(comparisonFunction, + columnCollation, columnValue, + previousMaximum); + int minimumComparison = DatumGetInt32(minimumComparisonDatum); + int maximumComparison = DatumGetInt32(maximumComparisonDatum); + + if (minimumComparison < 0) + { + currentMinimum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + currentMinimum = previousMinimum; + } + + if (maximumComparison > 0) + { + currentMaximum = DatumCopy(columnValue, columnTypeByValue, columnTypeLength); + } + else + { + currentMaximum = previousMaximum; + } + } + + blockSkipNode->hasMinMax = true; + blockSkipNode->minimumValue = currentMinimum; + blockSkipNode->maximumValue = currentMaximum; +} + + +/* Creates a copy of the given datum. */ +static Datum +DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) +{ + Datum datumCopy = 0; + + if (datumTypeByValue) + { + datumCopy = datum; + } + else + { + uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum); + char *datumData = palloc0(datumLength); + memcpy(datumData, DatumGetPointer(datum), datumLength); + + datumCopy = PointerGetDatum(datumData); + } + + return datumCopy; +} + + +/* + * AppendStripeMetadata adds a copy of given stripeMetadata to the given + * table footer's stripeMetadataList. + */ +static void +AppendStripeMetadata(TableFooter *tableFooter, StripeMetadata stripeMetadata) +{ + StripeMetadata *stripeMetadataCopy = palloc0(sizeof(StripeMetadata)); + memcpy(stripeMetadataCopy, &stripeMetadata, sizeof(StripeMetadata)); + + tableFooter->stripeMetadataList = lappend(tableFooter->stripeMetadataList, + stripeMetadataCopy); +} + + +/* Writes the given data to the given file pointer and checks for errors. */ +static void +WriteToFile(FILE *file, void *data, uint32 dataLength) +{ + int writeResult = 0; + int errorResult = 0; + + if (dataLength == 0) + { + return; + } + + errno = 0; + writeResult = fwrite(data, dataLength, 1, file); + if (writeResult != 1) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not write file: %m"))); + } + + errorResult = ferror(file); + if (errorResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("error in file: %m"))); + } +} + + +/* Flushes, syncs, and closes the given file pointer and checks for errors. */ +static void +SyncAndCloseFile(FILE *file) +{ + int flushResult = 0; + int syncResult = 0; + int errorResult = 0; + int freeResult = 0; + + errno = 0; + flushResult = fflush(file); + if (flushResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not flush file: %m"))); + } + + syncResult = pg_fsync(fileno(file)); + if (syncResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not sync file: %m"))); + } + + errorResult = ferror(file); + if (errorResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("error in file: %m"))); + } + + freeResult = FreeFile(file); + if (freeResult != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not close file: %m"))); + } +} + + +/* + * CopyStringInfo creates a deep copy of given source string allocating only needed + * amount of memory. + */ +static StringInfo +CopyStringInfo(StringInfo sourceString) +{ + StringInfo targetString = palloc0(sizeof(StringInfoData)); + + if (sourceString->len > 0) + { + targetString->data = palloc0(sourceString->len); + targetString->len = sourceString->len; + targetString->maxlen = sourceString->len; + memcpy(targetString->data, sourceString->data, sourceString->len); + } + + return targetString; +} diff --git a/data/array_types.csv b/data/array_types.csv new file mode 100644 index 000000000..f20e2d2d0 --- /dev/null +++ b/data/array_types.csv @@ -0,0 +1,3 @@ +"{1,2,3}","{1,2,3}","{a,b,c}" +{},{},{} +"{-2147483648,2147483647}","{-9223372036854775808,9223372036854775807}","{""""}" diff --git a/data/block_filtering.csv b/data/block_filtering.csv new file mode 100644 index 000000000..9812045fd --- /dev/null +++ b/data/block_filtering.csv @@ -0,0 +1,10000 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 +99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 +702 +703 +704 +705 +706 +707 +708 +709 +710 +711 +712 +713 +714 +715 +716 +717 +718 +719 +720 +721 +722 +723 +724 +725 +726 +727 +728 +729 +730 +731 +732 +733 +734 +735 +736 +737 +738 +739 +740 +741 +742 +743 +744 +745 +746 +747 +748 +749 +750 +751 +752 +753 +754 +755 +756 +757 +758 +759 +760 +761 +762 +763 +764 +765 +766 +767 +768 +769 +770 +771 +772 +773 +774 +775 +776 +777 +778 +779 +780 +781 +782 +783 +784 +785 +786 +787 +788 +789 +790 +791 +792 +793 +794 +795 +796 +797 +798 +799 +800 +801 +802 +803 +804 +805 +806 +807 +808 +809 +810 +811 +812 +813 +814 +815 +816 +817 +818 +819 +820 +821 +822 +823 +824 +825 +826 +827 +828 +829 +830 +831 +832 +833 +834 +835 +836 +837 +838 +839 +840 +841 +842 +843 +844 +845 +846 +847 +848 +849 +850 +851 +852 +853 +854 +855 +856 +857 +858 +859 +860 +861 +862 +863 +864 +865 +866 +867 +868 +869 +870 +871 +872 +873 +874 +875 +876 +877 +878 +879 +880 +881 +882 +883 +884 +885 +886 +887 +888 +889 +890 +891 +892 +893 +894 +895 +896 +897 +898 +899 +900 +901 +902 +903 +904 +905 +906 +907 +908 +909 +910 +911 +912 +913 +914 +915 +916 +917 +918 +919 +920 +921 +922 +923 +924 +925 +926 +927 +928 +929 +930 +931 +932 +933 +934 +935 +936 +937 +938 +939 +940 +941 +942 +943 +944 +945 +946 +947 +948 +949 +950 +951 +952 +953 +954 +955 +956 +957 +958 +959 +960 +961 +962 +963 +964 +965 +966 +967 +968 +969 +970 +971 +972 +973 +974 +975 +976 +977 +978 +979 +980 +981 +982 +983 +984 +985 +986 +987 +988 +989 +990 +991 +992 +993 +994 +995 +996 +997 +998 +999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 +1007 +1008 +1009 +1010 +1011 +1012 +1013 +1014 +1015 +1016 +1017 +1018 +1019 +1020 +1021 +1022 +1023 +1024 +1025 +1026 +1027 +1028 +1029 +1030 +1031 +1032 +1033 +1034 +1035 +1036 +1037 +1038 +1039 +1040 +1041 +1042 +1043 +1044 +1045 +1046 +1047 +1048 +1049 +1050 +1051 +1052 +1053 +1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 +1075 +1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 +1214 +1215 +1216 +1217 +1218 +1219 +1220 +1221 +1222 +1223 +1224 +1225 +1226 +1227 +1228 +1229 +1230 +1231 +1232 +1233 +1234 +1235 +1236 +1237 +1238 +1239 +1240 +1241 +1242 +1243 +1244 +1245 +1246 +1247 +1248 +1249 +1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 +1271 +1272 +1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 +1282 +1283 +1284 +1285 +1286 +1287 +1288 +1289 +1290 +1291 +1292 +1293 +1294 +1295 +1296 +1297 +1298 +1299 +1300 +1301 +1302 +1303 +1304 +1305 +1306 +1307 +1308 +1309 +1310 +1311 +1312 +1313 +1314 +1315 +1316 +1317 +1318 +1319 +1320 +1321 +1322 +1323 +1324 +1325 +1326 +1327 +1328 +1329 +1330 +1331 +1332 +1333 +1334 +1335 +1336 +1337 +1338 +1339 +1340 +1341 +1342 +1343 +1344 +1345 +1346 +1347 +1348 +1349 +1350 +1351 +1352 +1353 +1354 +1355 +1356 +1357 +1358 +1359 +1360 +1361 +1362 +1363 +1364 +1365 +1366 +1367 +1368 +1369 +1370 +1371 +1372 +1373 +1374 +1375 +1376 +1377 +1378 +1379 +1380 +1381 +1382 +1383 +1384 +1385 +1386 +1387 +1388 +1389 +1390 +1391 +1392 +1393 +1394 +1395 +1396 +1397 +1398 +1399 +1400 +1401 +1402 +1403 +1404 +1405 +1406 +1407 +1408 +1409 +1410 +1411 +1412 +1413 +1414 +1415 +1416 +1417 +1418 +1419 +1420 +1421 +1422 +1423 +1424 +1425 +1426 +1427 +1428 +1429 +1430 +1431 +1432 +1433 +1434 +1435 +1436 +1437 +1438 +1439 +1440 +1441 +1442 +1443 +1444 +1445 +1446 +1447 +1448 +1449 +1450 +1451 +1452 +1453 +1454 +1455 +1456 +1457 +1458 +1459 +1460 +1461 +1462 +1463 +1464 +1465 +1466 +1467 +1468 +1469 +1470 +1471 +1472 +1473 +1474 +1475 +1476 +1477 +1478 +1479 +1480 +1481 +1482 +1483 +1484 +1485 +1486 +1487 +1488 +1489 +1490 +1491 +1492 +1493 +1494 +1495 +1496 +1497 +1498 +1499 +1500 +1501 +1502 +1503 +1504 +1505 +1506 +1507 +1508 +1509 +1510 +1511 +1512 +1513 +1514 +1515 +1516 +1517 +1518 +1519 +1520 +1521 +1522 +1523 +1524 +1525 +1526 +1527 +1528 +1529 +1530 +1531 +1532 +1533 +1534 +1535 +1536 +1537 +1538 +1539 +1540 +1541 +1542 +1543 +1544 +1545 +1546 +1547 +1548 +1549 +1550 +1551 +1552 +1553 +1554 +1555 +1556 +1557 +1558 +1559 +1560 +1561 +1562 +1563 +1564 +1565 +1566 +1567 +1568 +1569 +1570 +1571 +1572 +1573 +1574 +1575 +1576 +1577 +1578 +1579 +1580 +1581 +1582 +1583 +1584 +1585 +1586 +1587 +1588 +1589 +1590 +1591 +1592 +1593 +1594 +1595 +1596 +1597 +1598 +1599 +1600 +1601 +1602 +1603 +1604 +1605 +1606 +1607 +1608 +1609 +1610 +1611 +1612 +1613 +1614 +1615 +1616 +1617 +1618 +1619 +1620 +1621 +1622 +1623 +1624 +1625 +1626 +1627 +1628 +1629 +1630 +1631 +1632 +1633 +1634 +1635 +1636 +1637 +1638 +1639 +1640 +1641 +1642 +1643 +1644 +1645 +1646 +1647 +1648 +1649 +1650 +1651 +1652 +1653 +1654 +1655 +1656 +1657 +1658 +1659 +1660 +1661 +1662 +1663 +1664 +1665 +1666 +1667 +1668 +1669 +1670 +1671 +1672 +1673 +1674 +1675 +1676 +1677 +1678 +1679 +1680 +1681 +1682 +1683 +1684 +1685 +1686 +1687 +1688 +1689 +1690 +1691 +1692 +1693 +1694 +1695 +1696 +1697 +1698 +1699 +1700 +1701 +1702 +1703 +1704 +1705 +1706 +1707 +1708 +1709 +1710 +1711 +1712 +1713 +1714 +1715 +1716 +1717 +1718 +1719 +1720 +1721 +1722 +1723 +1724 +1725 +1726 +1727 +1728 +1729 +1730 +1731 +1732 +1733 +1734 +1735 +1736 +1737 +1738 +1739 +1740 +1741 +1742 +1743 +1744 +1745 +1746 +1747 +1748 +1749 +1750 +1751 +1752 +1753 +1754 +1755 +1756 +1757 +1758 +1759 +1760 +1761 +1762 +1763 +1764 +1765 +1766 +1767 +1768 +1769 +1770 +1771 +1772 +1773 +1774 +1775 +1776 +1777 +1778 +1779 +1780 +1781 +1782 +1783 +1784 +1785 +1786 +1787 +1788 +1789 +1790 +1791 +1792 +1793 +1794 +1795 +1796 +1797 +1798 +1799 +1800 +1801 +1802 +1803 +1804 +1805 +1806 +1807 +1808 +1809 +1810 +1811 +1812 +1813 +1814 +1815 +1816 +1817 +1818 +1819 +1820 +1821 +1822 +1823 +1824 +1825 +1826 +1827 +1828 +1829 +1830 +1831 +1832 +1833 +1834 +1835 +1836 +1837 +1838 +1839 +1840 +1841 +1842 +1843 +1844 +1845 +1846 +1847 +1848 +1849 +1850 +1851 +1852 +1853 +1854 +1855 +1856 +1857 +1858 +1859 +1860 +1861 +1862 +1863 +1864 +1865 +1866 +1867 +1868 +1869 +1870 +1871 +1872 +1873 +1874 +1875 +1876 +1877 +1878 +1879 +1880 +1881 +1882 +1883 +1884 +1885 +1886 +1887 +1888 +1889 +1890 +1891 +1892 +1893 +1894 +1895 +1896 +1897 +1898 +1899 +1900 +1901 +1902 +1903 +1904 +1905 +1906 +1907 +1908 +1909 +1910 +1911 +1912 +1913 +1914 +1915 +1916 +1917 +1918 +1919 +1920 +1921 +1922 +1923 +1924 +1925 +1926 +1927 +1928 +1929 +1930 +1931 +1932 +1933 +1934 +1935 +1936 +1937 +1938 +1939 +1940 +1941 +1942 +1943 +1944 +1945 +1946 +1947 +1948 +1949 +1950 +1951 +1952 +1953 +1954 +1955 +1956 +1957 +1958 +1959 +1960 +1961 +1962 +1963 +1964 +1965 +1966 +1967 +1968 +1969 +1970 +1971 +1972 +1973 +1974 +1975 +1976 +1977 +1978 +1979 +1980 +1981 +1982 +1983 +1984 +1985 +1986 +1987 +1988 +1989 +1990 +1991 +1992 +1993 +1994 +1995 +1996 +1997 +1998 +1999 +2000 +2001 +2002 +2003 +2004 +2005 +2006 +2007 +2008 +2009 +2010 +2011 +2012 +2013 +2014 +2015 +2016 +2017 +2018 +2019 +2020 +2021 +2022 +2023 +2024 +2025 +2026 +2027 +2028 +2029 +2030 +2031 +2032 +2033 +2034 +2035 +2036 +2037 +2038 +2039 +2040 +2041 +2042 +2043 +2044 +2045 +2046 +2047 +2048 +2049 +2050 +2051 +2052 +2053 +2054 +2055 +2056 +2057 +2058 +2059 +2060 +2061 +2062 +2063 +2064 +2065 +2066 +2067 +2068 +2069 +2070 +2071 +2072 +2073 +2074 +2075 +2076 +2077 +2078 +2079 +2080 +2081 +2082 +2083 +2084 +2085 +2086 +2087 +2088 +2089 +2090 +2091 +2092 +2093 +2094 +2095 +2096 +2097 +2098 +2099 +2100 +2101 +2102 +2103 +2104 +2105 +2106 +2107 +2108 +2109 +2110 +2111 +2112 +2113 +2114 +2115 +2116 +2117 +2118 +2119 +2120 +2121 +2122 +2123 +2124 +2125 +2126 +2127 +2128 +2129 +2130 +2131 +2132 +2133 +2134 +2135 +2136 +2137 +2138 +2139 +2140 +2141 +2142 +2143 +2144 +2145 +2146 +2147 +2148 +2149 +2150 +2151 +2152 +2153 +2154 +2155 +2156 +2157 +2158 +2159 +2160 +2161 +2162 +2163 +2164 +2165 +2166 +2167 +2168 +2169 +2170 +2171 +2172 +2173 +2174 +2175 +2176 +2177 +2178 +2179 +2180 +2181 +2182 +2183 +2184 +2185 +2186 +2187 +2188 +2189 +2190 +2191 +2192 +2193 +2194 +2195 +2196 +2197 +2198 +2199 +2200 +2201 +2202 +2203 +2204 +2205 +2206 +2207 +2208 +2209 +2210 +2211 +2212 +2213 +2214 +2215 +2216 +2217 +2218 +2219 +2220 +2221 +2222 +2223 +2224 +2225 +2226 +2227 +2228 +2229 +2230 +2231 +2232 +2233 +2234 +2235 +2236 +2237 +2238 +2239 +2240 +2241 +2242 +2243 +2244 +2245 +2246 +2247 +2248 +2249 +2250 +2251 +2252 +2253 +2254 +2255 +2256 +2257 +2258 +2259 +2260 +2261 +2262 +2263 +2264 +2265 +2266 +2267 +2268 +2269 +2270 +2271 +2272 +2273 +2274 +2275 +2276 +2277 +2278 +2279 +2280 +2281 +2282 +2283 +2284 +2285 +2286 +2287 +2288 +2289 +2290 +2291 +2292 +2293 +2294 +2295 +2296 +2297 +2298 +2299 +2300 +2301 +2302 +2303 +2304 +2305 +2306 +2307 +2308 +2309 +2310 +2311 +2312 +2313 +2314 +2315 +2316 +2317 +2318 +2319 +2320 +2321 +2322 +2323 +2324 +2325 +2326 +2327 +2328 +2329 +2330 +2331 +2332 +2333 +2334 +2335 +2336 +2337 +2338 +2339 +2340 +2341 +2342 +2343 +2344 +2345 +2346 +2347 +2348 +2349 +2350 +2351 +2352 +2353 +2354 +2355 +2356 +2357 +2358 +2359 +2360 +2361 +2362 +2363 +2364 +2365 +2366 +2367 +2368 +2369 +2370 +2371 +2372 +2373 +2374 +2375 +2376 +2377 +2378 +2379 +2380 +2381 +2382 +2383 +2384 +2385 +2386 +2387 +2388 +2389 +2390 +2391 +2392 +2393 +2394 +2395 +2396 +2397 +2398 +2399 +2400 +2401 +2402 +2403 +2404 +2405 +2406 +2407 +2408 +2409 +2410 +2411 +2412 +2413 +2414 +2415 +2416 +2417 +2418 +2419 +2420 +2421 +2422 +2423 +2424 +2425 +2426 +2427 +2428 +2429 +2430 +2431 +2432 +2433 +2434 +2435 +2436 +2437 +2438 +2439 +2440 +2441 +2442 +2443 +2444 +2445 +2446 +2447 +2448 +2449 +2450 +2451 +2452 +2453 +2454 +2455 +2456 +2457 +2458 +2459 +2460 +2461 +2462 +2463 +2464 +2465 +2466 +2467 +2468 +2469 +2470 +2471 +2472 +2473 +2474 +2475 +2476 +2477 +2478 +2479 +2480 +2481 +2482 +2483 +2484 +2485 +2486 +2487 +2488 +2489 +2490 +2491 +2492 +2493 +2494 +2495 +2496 +2497 +2498 +2499 +2500 +2501 +2502 +2503 +2504 +2505 +2506 +2507 +2508 +2509 +2510 +2511 +2512 +2513 +2514 +2515 +2516 +2517 +2518 +2519 +2520 +2521 +2522 +2523 +2524 +2525 +2526 +2527 +2528 +2529 +2530 +2531 +2532 +2533 +2534 +2535 +2536 +2537 +2538 +2539 +2540 +2541 +2542 +2543 +2544 +2545 +2546 +2547 +2548 +2549 +2550 +2551 +2552 +2553 +2554 +2555 +2556 +2557 +2558 +2559 +2560 +2561 +2562 +2563 +2564 +2565 +2566 +2567 +2568 +2569 +2570 +2571 +2572 +2573 +2574 +2575 +2576 +2577 +2578 +2579 +2580 +2581 +2582 +2583 +2584 +2585 +2586 +2587 +2588 +2589 +2590 +2591 +2592 +2593 +2594 +2595 +2596 +2597 +2598 +2599 +2600 +2601 +2602 +2603 +2604 +2605 +2606 +2607 +2608 +2609 +2610 +2611 +2612 +2613 +2614 +2615 +2616 +2617 +2618 +2619 +2620 +2621 +2622 +2623 +2624 +2625 +2626 +2627 +2628 +2629 +2630 +2631 +2632 +2633 +2634 +2635 +2636 +2637 +2638 +2639 +2640 +2641 +2642 +2643 +2644 +2645 +2646 +2647 +2648 +2649 +2650 +2651 +2652 +2653 +2654 +2655 +2656 +2657 +2658 +2659 +2660 +2661 +2662 +2663 +2664 +2665 +2666 +2667 +2668 +2669 +2670 +2671 +2672 +2673 +2674 +2675 +2676 +2677 +2678 +2679 +2680 +2681 +2682 +2683 +2684 +2685 +2686 +2687 +2688 +2689 +2690 +2691 +2692 +2693 +2694 +2695 +2696 +2697 +2698 +2699 +2700 +2701 +2702 +2703 +2704 +2705 +2706 +2707 +2708 +2709 +2710 +2711 +2712 +2713 +2714 +2715 +2716 +2717 +2718 +2719 +2720 +2721 +2722 +2723 +2724 +2725 +2726 +2727 +2728 +2729 +2730 +2731 +2732 +2733 +2734 +2735 +2736 +2737 +2738 +2739 +2740 +2741 +2742 +2743 +2744 +2745 +2746 +2747 +2748 +2749 +2750 +2751 +2752 +2753 +2754 +2755 +2756 +2757 +2758 +2759 +2760 +2761 +2762 +2763 +2764 +2765 +2766 +2767 +2768 +2769 +2770 +2771 +2772 +2773 +2774 +2775 +2776 +2777 +2778 +2779 +2780 +2781 +2782 +2783 +2784 +2785 +2786 +2787 +2788 +2789 +2790 +2791 +2792 +2793 +2794 +2795 +2796 +2797 +2798 +2799 +2800 +2801 +2802 +2803 +2804 +2805 +2806 +2807 +2808 +2809 +2810 +2811 +2812 +2813 +2814 +2815 +2816 +2817 +2818 +2819 +2820 +2821 +2822 +2823 +2824 +2825 +2826 +2827 +2828 +2829 +2830 +2831 +2832 +2833 +2834 +2835 +2836 +2837 +2838 +2839 +2840 +2841 +2842 +2843 +2844 +2845 +2846 +2847 +2848 +2849 +2850 +2851 +2852 +2853 +2854 +2855 +2856 +2857 +2858 +2859 +2860 +2861 +2862 +2863 +2864 +2865 +2866 +2867 +2868 +2869 +2870 +2871 +2872 +2873 +2874 +2875 +2876 +2877 +2878 +2879 +2880 +2881 +2882 +2883 +2884 +2885 +2886 +2887 +2888 +2889 +2890 +2891 +2892 +2893 +2894 +2895 +2896 +2897 +2898 +2899 +2900 +2901 +2902 +2903 +2904 +2905 +2906 +2907 +2908 +2909 +2910 +2911 +2912 +2913 +2914 +2915 +2916 +2917 +2918 +2919 +2920 +2921 +2922 +2923 +2924 +2925 +2926 +2927 +2928 +2929 +2930 +2931 +2932 +2933 +2934 +2935 +2936 +2937 +2938 +2939 +2940 +2941 +2942 +2943 +2944 +2945 +2946 +2947 +2948 +2949 +2950 +2951 +2952 +2953 +2954 +2955 +2956 +2957 +2958 +2959 +2960 +2961 +2962 +2963 +2964 +2965 +2966 +2967 +2968 +2969 +2970 +2971 +2972 +2973 +2974 +2975 +2976 +2977 +2978 +2979 +2980 +2981 +2982 +2983 +2984 +2985 +2986 +2987 +2988 +2989 +2990 +2991 +2992 +2993 +2994 +2995 +2996 +2997 +2998 +2999 +3000 +3001 +3002 +3003 +3004 +3005 +3006 +3007 +3008 +3009 +3010 +3011 +3012 +3013 +3014 +3015 +3016 +3017 +3018 +3019 +3020 +3021 +3022 +3023 +3024 +3025 +3026 +3027 +3028 +3029 +3030 +3031 +3032 +3033 +3034 +3035 +3036 +3037 +3038 +3039 +3040 +3041 +3042 +3043 +3044 +3045 +3046 +3047 +3048 +3049 +3050 +3051 +3052 +3053 +3054 +3055 +3056 +3057 +3058 +3059 +3060 +3061 +3062 +3063 +3064 +3065 +3066 +3067 +3068 +3069 +3070 +3071 +3072 +3073 +3074 +3075 +3076 +3077 +3078 +3079 +3080 +3081 +3082 +3083 +3084 +3085 +3086 +3087 +3088 +3089 +3090 +3091 +3092 +3093 +3094 +3095 +3096 +3097 +3098 +3099 +3100 +3101 +3102 +3103 +3104 +3105 +3106 +3107 +3108 +3109 +3110 +3111 +3112 +3113 +3114 +3115 +3116 +3117 +3118 +3119 +3120 +3121 +3122 +3123 +3124 +3125 +3126 +3127 +3128 +3129 +3130 +3131 +3132 +3133 +3134 +3135 +3136 +3137 +3138 +3139 +3140 +3141 +3142 +3143 +3144 +3145 +3146 +3147 +3148 +3149 +3150 +3151 +3152 +3153 +3154 +3155 +3156 +3157 +3158 +3159 +3160 +3161 +3162 +3163 +3164 +3165 +3166 +3167 +3168 +3169 +3170 +3171 +3172 +3173 +3174 +3175 +3176 +3177 +3178 +3179 +3180 +3181 +3182 +3183 +3184 +3185 +3186 +3187 +3188 +3189 +3190 +3191 +3192 +3193 +3194 +3195 +3196 +3197 +3198 +3199 +3200 +3201 +3202 +3203 +3204 +3205 +3206 +3207 +3208 +3209 +3210 +3211 +3212 +3213 +3214 +3215 +3216 +3217 +3218 +3219 +3220 +3221 +3222 +3223 +3224 +3225 +3226 +3227 +3228 +3229 +3230 +3231 +3232 +3233 +3234 +3235 +3236 +3237 +3238 +3239 +3240 +3241 +3242 +3243 +3244 +3245 +3246 +3247 +3248 +3249 +3250 +3251 +3252 +3253 +3254 +3255 +3256 +3257 +3258 +3259 +3260 +3261 +3262 +3263 +3264 +3265 +3266 +3267 +3268 +3269 +3270 +3271 +3272 +3273 +3274 +3275 +3276 +3277 +3278 +3279 +3280 +3281 +3282 +3283 +3284 +3285 +3286 +3287 +3288 +3289 +3290 +3291 +3292 +3293 +3294 +3295 +3296 +3297 +3298 +3299 +3300 +3301 +3302 +3303 +3304 +3305 +3306 +3307 +3308 +3309 +3310 +3311 +3312 +3313 +3314 +3315 +3316 +3317 +3318 +3319 +3320 +3321 +3322 +3323 +3324 +3325 +3326 +3327 +3328 +3329 +3330 +3331 +3332 +3333 +3334 +3335 +3336 +3337 +3338 +3339 +3340 +3341 +3342 +3343 +3344 +3345 +3346 +3347 +3348 +3349 +3350 +3351 +3352 +3353 +3354 +3355 +3356 +3357 +3358 +3359 +3360 +3361 +3362 +3363 +3364 +3365 +3366 +3367 +3368 +3369 +3370 +3371 +3372 +3373 +3374 +3375 +3376 +3377 +3378 +3379 +3380 +3381 +3382 +3383 +3384 +3385 +3386 +3387 +3388 +3389 +3390 +3391 +3392 +3393 +3394 +3395 +3396 +3397 +3398 +3399 +3400 +3401 +3402 +3403 +3404 +3405 +3406 +3407 +3408 +3409 +3410 +3411 +3412 +3413 +3414 +3415 +3416 +3417 +3418 +3419 +3420 +3421 +3422 +3423 +3424 +3425 +3426 +3427 +3428 +3429 +3430 +3431 +3432 +3433 +3434 +3435 +3436 +3437 +3438 +3439 +3440 +3441 +3442 +3443 +3444 +3445 +3446 +3447 +3448 +3449 +3450 +3451 +3452 +3453 +3454 +3455 +3456 +3457 +3458 +3459 +3460 +3461 +3462 +3463 +3464 +3465 +3466 +3467 +3468 +3469 +3470 +3471 +3472 +3473 +3474 +3475 +3476 +3477 +3478 +3479 +3480 +3481 +3482 +3483 +3484 +3485 +3486 +3487 +3488 +3489 +3490 +3491 +3492 +3493 +3494 +3495 +3496 +3497 +3498 +3499 +3500 +3501 +3502 +3503 +3504 +3505 +3506 +3507 +3508 +3509 +3510 +3511 +3512 +3513 +3514 +3515 +3516 +3517 +3518 +3519 +3520 +3521 +3522 +3523 +3524 +3525 +3526 +3527 +3528 +3529 +3530 +3531 +3532 +3533 +3534 +3535 +3536 +3537 +3538 +3539 +3540 +3541 +3542 +3543 +3544 +3545 +3546 +3547 +3548 +3549 +3550 +3551 +3552 +3553 +3554 +3555 +3556 +3557 +3558 +3559 +3560 +3561 +3562 +3563 +3564 +3565 +3566 +3567 +3568 +3569 +3570 +3571 +3572 +3573 +3574 +3575 +3576 +3577 +3578 +3579 +3580 +3581 +3582 +3583 +3584 +3585 +3586 +3587 +3588 +3589 +3590 +3591 +3592 +3593 +3594 +3595 +3596 +3597 +3598 +3599 +3600 +3601 +3602 +3603 +3604 +3605 +3606 +3607 +3608 +3609 +3610 +3611 +3612 +3613 +3614 +3615 +3616 +3617 +3618 +3619 +3620 +3621 +3622 +3623 +3624 +3625 +3626 +3627 +3628 +3629 +3630 +3631 +3632 +3633 +3634 +3635 +3636 +3637 +3638 +3639 +3640 +3641 +3642 +3643 +3644 +3645 +3646 +3647 +3648 +3649 +3650 +3651 +3652 +3653 +3654 +3655 +3656 +3657 +3658 +3659 +3660 +3661 +3662 +3663 +3664 +3665 +3666 +3667 +3668 +3669 +3670 +3671 +3672 +3673 +3674 +3675 +3676 +3677 +3678 +3679 +3680 +3681 +3682 +3683 +3684 +3685 +3686 +3687 +3688 +3689 +3690 +3691 +3692 +3693 +3694 +3695 +3696 +3697 +3698 +3699 +3700 +3701 +3702 +3703 +3704 +3705 +3706 +3707 +3708 +3709 +3710 +3711 +3712 +3713 +3714 +3715 +3716 +3717 +3718 +3719 +3720 +3721 +3722 +3723 +3724 +3725 +3726 +3727 +3728 +3729 +3730 +3731 +3732 +3733 +3734 +3735 +3736 +3737 +3738 +3739 +3740 +3741 +3742 +3743 +3744 +3745 +3746 +3747 +3748 +3749 +3750 +3751 +3752 +3753 +3754 +3755 +3756 +3757 +3758 +3759 +3760 +3761 +3762 +3763 +3764 +3765 +3766 +3767 +3768 +3769 +3770 +3771 +3772 +3773 +3774 +3775 +3776 +3777 +3778 +3779 +3780 +3781 +3782 +3783 +3784 +3785 +3786 +3787 +3788 +3789 +3790 +3791 +3792 +3793 +3794 +3795 +3796 +3797 +3798 +3799 +3800 +3801 +3802 +3803 +3804 +3805 +3806 +3807 +3808 +3809 +3810 +3811 +3812 +3813 +3814 +3815 +3816 +3817 +3818 +3819 +3820 +3821 +3822 +3823 +3824 +3825 +3826 +3827 +3828 +3829 +3830 +3831 +3832 +3833 +3834 +3835 +3836 +3837 +3838 +3839 +3840 +3841 +3842 +3843 +3844 +3845 +3846 +3847 +3848 +3849 +3850 +3851 +3852 +3853 +3854 +3855 +3856 +3857 +3858 +3859 +3860 +3861 +3862 +3863 +3864 +3865 +3866 +3867 +3868 +3869 +3870 +3871 +3872 +3873 +3874 +3875 +3876 +3877 +3878 +3879 +3880 +3881 +3882 +3883 +3884 +3885 +3886 +3887 +3888 +3889 +3890 +3891 +3892 +3893 +3894 +3895 +3896 +3897 +3898 +3899 +3900 +3901 +3902 +3903 +3904 +3905 +3906 +3907 +3908 +3909 +3910 +3911 +3912 +3913 +3914 +3915 +3916 +3917 +3918 +3919 +3920 +3921 +3922 +3923 +3924 +3925 +3926 +3927 +3928 +3929 +3930 +3931 +3932 +3933 +3934 +3935 +3936 +3937 +3938 +3939 +3940 +3941 +3942 +3943 +3944 +3945 +3946 +3947 +3948 +3949 +3950 +3951 +3952 +3953 +3954 +3955 +3956 +3957 +3958 +3959 +3960 +3961 +3962 +3963 +3964 +3965 +3966 +3967 +3968 +3969 +3970 +3971 +3972 +3973 +3974 +3975 +3976 +3977 +3978 +3979 +3980 +3981 +3982 +3983 +3984 +3985 +3986 +3987 +3988 +3989 +3990 +3991 +3992 +3993 +3994 +3995 +3996 +3997 +3998 +3999 +4000 +4001 +4002 +4003 +4004 +4005 +4006 +4007 +4008 +4009 +4010 +4011 +4012 +4013 +4014 +4015 +4016 +4017 +4018 +4019 +4020 +4021 +4022 +4023 +4024 +4025 +4026 +4027 +4028 +4029 +4030 +4031 +4032 +4033 +4034 +4035 +4036 +4037 +4038 +4039 +4040 +4041 +4042 +4043 +4044 +4045 +4046 +4047 +4048 +4049 +4050 +4051 +4052 +4053 +4054 +4055 +4056 +4057 +4058 +4059 +4060 +4061 +4062 +4063 +4064 +4065 +4066 +4067 +4068 +4069 +4070 +4071 +4072 +4073 +4074 +4075 +4076 +4077 +4078 +4079 +4080 +4081 +4082 +4083 +4084 +4085 +4086 +4087 +4088 +4089 +4090 +4091 +4092 +4093 +4094 +4095 +4096 +4097 +4098 +4099 +4100 +4101 +4102 +4103 +4104 +4105 +4106 +4107 +4108 +4109 +4110 +4111 +4112 +4113 +4114 +4115 +4116 +4117 +4118 +4119 +4120 +4121 +4122 +4123 +4124 +4125 +4126 +4127 +4128 +4129 +4130 +4131 +4132 +4133 +4134 +4135 +4136 +4137 +4138 +4139 +4140 +4141 +4142 +4143 +4144 +4145 +4146 +4147 +4148 +4149 +4150 +4151 +4152 +4153 +4154 +4155 +4156 +4157 +4158 +4159 +4160 +4161 +4162 +4163 +4164 +4165 +4166 +4167 +4168 +4169 +4170 +4171 +4172 +4173 +4174 +4175 +4176 +4177 +4178 +4179 +4180 +4181 +4182 +4183 +4184 +4185 +4186 +4187 +4188 +4189 +4190 +4191 +4192 +4193 +4194 +4195 +4196 +4197 +4198 +4199 +4200 +4201 +4202 +4203 +4204 +4205 +4206 +4207 +4208 +4209 +4210 +4211 +4212 +4213 +4214 +4215 +4216 +4217 +4218 +4219 +4220 +4221 +4222 +4223 +4224 +4225 +4226 +4227 +4228 +4229 +4230 +4231 +4232 +4233 +4234 +4235 +4236 +4237 +4238 +4239 +4240 +4241 +4242 +4243 +4244 +4245 +4246 +4247 +4248 +4249 +4250 +4251 +4252 +4253 +4254 +4255 +4256 +4257 +4258 +4259 +4260 +4261 +4262 +4263 +4264 +4265 +4266 +4267 +4268 +4269 +4270 +4271 +4272 +4273 +4274 +4275 +4276 +4277 +4278 +4279 +4280 +4281 +4282 +4283 +4284 +4285 +4286 +4287 +4288 +4289 +4290 +4291 +4292 +4293 +4294 +4295 +4296 +4297 +4298 +4299 +4300 +4301 +4302 +4303 +4304 +4305 +4306 +4307 +4308 +4309 +4310 +4311 +4312 +4313 +4314 +4315 +4316 +4317 +4318 +4319 +4320 +4321 +4322 +4323 +4324 +4325 +4326 +4327 +4328 +4329 +4330 +4331 +4332 +4333 +4334 +4335 +4336 +4337 +4338 +4339 +4340 +4341 +4342 +4343 +4344 +4345 +4346 +4347 +4348 +4349 +4350 +4351 +4352 +4353 +4354 +4355 +4356 +4357 +4358 +4359 +4360 +4361 +4362 +4363 +4364 +4365 +4366 +4367 +4368 +4369 +4370 +4371 +4372 +4373 +4374 +4375 +4376 +4377 +4378 +4379 +4380 +4381 +4382 +4383 +4384 +4385 +4386 +4387 +4388 +4389 +4390 +4391 +4392 +4393 +4394 +4395 +4396 +4397 +4398 +4399 +4400 +4401 +4402 +4403 +4404 +4405 +4406 +4407 +4408 +4409 +4410 +4411 +4412 +4413 +4414 +4415 +4416 +4417 +4418 +4419 +4420 +4421 +4422 +4423 +4424 +4425 +4426 +4427 +4428 +4429 +4430 +4431 +4432 +4433 +4434 +4435 +4436 +4437 +4438 +4439 +4440 +4441 +4442 +4443 +4444 +4445 +4446 +4447 +4448 +4449 +4450 +4451 +4452 +4453 +4454 +4455 +4456 +4457 +4458 +4459 +4460 +4461 +4462 +4463 +4464 +4465 +4466 +4467 +4468 +4469 +4470 +4471 +4472 +4473 +4474 +4475 +4476 +4477 +4478 +4479 +4480 +4481 +4482 +4483 +4484 +4485 +4486 +4487 +4488 +4489 +4490 +4491 +4492 +4493 +4494 +4495 +4496 +4497 +4498 +4499 +4500 +4501 +4502 +4503 +4504 +4505 +4506 +4507 +4508 +4509 +4510 +4511 +4512 +4513 +4514 +4515 +4516 +4517 +4518 +4519 +4520 +4521 +4522 +4523 +4524 +4525 +4526 +4527 +4528 +4529 +4530 +4531 +4532 +4533 +4534 +4535 +4536 +4537 +4538 +4539 +4540 +4541 +4542 +4543 +4544 +4545 +4546 +4547 +4548 +4549 +4550 +4551 +4552 +4553 +4554 +4555 +4556 +4557 +4558 +4559 +4560 +4561 +4562 +4563 +4564 +4565 +4566 +4567 +4568 +4569 +4570 +4571 +4572 +4573 +4574 +4575 +4576 +4577 +4578 +4579 +4580 +4581 +4582 +4583 +4584 +4585 +4586 +4587 +4588 +4589 +4590 +4591 +4592 +4593 +4594 +4595 +4596 +4597 +4598 +4599 +4600 +4601 +4602 +4603 +4604 +4605 +4606 +4607 +4608 +4609 +4610 +4611 +4612 +4613 +4614 +4615 +4616 +4617 +4618 +4619 +4620 +4621 +4622 +4623 +4624 +4625 +4626 +4627 +4628 +4629 +4630 +4631 +4632 +4633 +4634 +4635 +4636 +4637 +4638 +4639 +4640 +4641 +4642 +4643 +4644 +4645 +4646 +4647 +4648 +4649 +4650 +4651 +4652 +4653 +4654 +4655 +4656 +4657 +4658 +4659 +4660 +4661 +4662 +4663 +4664 +4665 +4666 +4667 +4668 +4669 +4670 +4671 +4672 +4673 +4674 +4675 +4676 +4677 +4678 +4679 +4680 +4681 +4682 +4683 +4684 +4685 +4686 +4687 +4688 +4689 +4690 +4691 +4692 +4693 +4694 +4695 +4696 +4697 +4698 +4699 +4700 +4701 +4702 +4703 +4704 +4705 +4706 +4707 +4708 +4709 +4710 +4711 +4712 +4713 +4714 +4715 +4716 +4717 +4718 +4719 +4720 +4721 +4722 +4723 +4724 +4725 +4726 +4727 +4728 +4729 +4730 +4731 +4732 +4733 +4734 +4735 +4736 +4737 +4738 +4739 +4740 +4741 +4742 +4743 +4744 +4745 +4746 +4747 +4748 +4749 +4750 +4751 +4752 +4753 +4754 +4755 +4756 +4757 +4758 +4759 +4760 +4761 +4762 +4763 +4764 +4765 +4766 +4767 +4768 +4769 +4770 +4771 +4772 +4773 +4774 +4775 +4776 +4777 +4778 +4779 +4780 +4781 +4782 +4783 +4784 +4785 +4786 +4787 +4788 +4789 +4790 +4791 +4792 +4793 +4794 +4795 +4796 +4797 +4798 +4799 +4800 +4801 +4802 +4803 +4804 +4805 +4806 +4807 +4808 +4809 +4810 +4811 +4812 +4813 +4814 +4815 +4816 +4817 +4818 +4819 +4820 +4821 +4822 +4823 +4824 +4825 +4826 +4827 +4828 +4829 +4830 +4831 +4832 +4833 +4834 +4835 +4836 +4837 +4838 +4839 +4840 +4841 +4842 +4843 +4844 +4845 +4846 +4847 +4848 +4849 +4850 +4851 +4852 +4853 +4854 +4855 +4856 +4857 +4858 +4859 +4860 +4861 +4862 +4863 +4864 +4865 +4866 +4867 +4868 +4869 +4870 +4871 +4872 +4873 +4874 +4875 +4876 +4877 +4878 +4879 +4880 +4881 +4882 +4883 +4884 +4885 +4886 +4887 +4888 +4889 +4890 +4891 +4892 +4893 +4894 +4895 +4896 +4897 +4898 +4899 +4900 +4901 +4902 +4903 +4904 +4905 +4906 +4907 +4908 +4909 +4910 +4911 +4912 +4913 +4914 +4915 +4916 +4917 +4918 +4919 +4920 +4921 +4922 +4923 +4924 +4925 +4926 +4927 +4928 +4929 +4930 +4931 +4932 +4933 +4934 +4935 +4936 +4937 +4938 +4939 +4940 +4941 +4942 +4943 +4944 +4945 +4946 +4947 +4948 +4949 +4950 +4951 +4952 +4953 +4954 +4955 +4956 +4957 +4958 +4959 +4960 +4961 +4962 +4963 +4964 +4965 +4966 +4967 +4968 +4969 +4970 +4971 +4972 +4973 +4974 +4975 +4976 +4977 +4978 +4979 +4980 +4981 +4982 +4983 +4984 +4985 +4986 +4987 +4988 +4989 +4990 +4991 +4992 +4993 +4994 +4995 +4996 +4997 +4998 +4999 +5000 +5001 +5002 +5003 +5004 +5005 +5006 +5007 +5008 +5009 +5010 +5011 +5012 +5013 +5014 +5015 +5016 +5017 +5018 +5019 +5020 +5021 +5022 +5023 +5024 +5025 +5026 +5027 +5028 +5029 +5030 +5031 +5032 +5033 +5034 +5035 +5036 +5037 +5038 +5039 +5040 +5041 +5042 +5043 +5044 +5045 +5046 +5047 +5048 +5049 +5050 +5051 +5052 +5053 +5054 +5055 +5056 +5057 +5058 +5059 +5060 +5061 +5062 +5063 +5064 +5065 +5066 +5067 +5068 +5069 +5070 +5071 +5072 +5073 +5074 +5075 +5076 +5077 +5078 +5079 +5080 +5081 +5082 +5083 +5084 +5085 +5086 +5087 +5088 +5089 +5090 +5091 +5092 +5093 +5094 +5095 +5096 +5097 +5098 +5099 +5100 +5101 +5102 +5103 +5104 +5105 +5106 +5107 +5108 +5109 +5110 +5111 +5112 +5113 +5114 +5115 +5116 +5117 +5118 +5119 +5120 +5121 +5122 +5123 +5124 +5125 +5126 +5127 +5128 +5129 +5130 +5131 +5132 +5133 +5134 +5135 +5136 +5137 +5138 +5139 +5140 +5141 +5142 +5143 +5144 +5145 +5146 +5147 +5148 +5149 +5150 +5151 +5152 +5153 +5154 +5155 +5156 +5157 +5158 +5159 +5160 +5161 +5162 +5163 +5164 +5165 +5166 +5167 +5168 +5169 +5170 +5171 +5172 +5173 +5174 +5175 +5176 +5177 +5178 +5179 +5180 +5181 +5182 +5183 +5184 +5185 +5186 +5187 +5188 +5189 +5190 +5191 +5192 +5193 +5194 +5195 +5196 +5197 +5198 +5199 +5200 +5201 +5202 +5203 +5204 +5205 +5206 +5207 +5208 +5209 +5210 +5211 +5212 +5213 +5214 +5215 +5216 +5217 +5218 +5219 +5220 +5221 +5222 +5223 +5224 +5225 +5226 +5227 +5228 +5229 +5230 +5231 +5232 +5233 +5234 +5235 +5236 +5237 +5238 +5239 +5240 +5241 +5242 +5243 +5244 +5245 +5246 +5247 +5248 +5249 +5250 +5251 +5252 +5253 +5254 +5255 +5256 +5257 +5258 +5259 +5260 +5261 +5262 +5263 +5264 +5265 +5266 +5267 +5268 +5269 +5270 +5271 +5272 +5273 +5274 +5275 +5276 +5277 +5278 +5279 +5280 +5281 +5282 +5283 +5284 +5285 +5286 +5287 +5288 +5289 +5290 +5291 +5292 +5293 +5294 +5295 +5296 +5297 +5298 +5299 +5300 +5301 +5302 +5303 +5304 +5305 +5306 +5307 +5308 +5309 +5310 +5311 +5312 +5313 +5314 +5315 +5316 +5317 +5318 +5319 +5320 +5321 +5322 +5323 +5324 +5325 +5326 +5327 +5328 +5329 +5330 +5331 +5332 +5333 +5334 +5335 +5336 +5337 +5338 +5339 +5340 +5341 +5342 +5343 +5344 +5345 +5346 +5347 +5348 +5349 +5350 +5351 +5352 +5353 +5354 +5355 +5356 +5357 +5358 +5359 +5360 +5361 +5362 +5363 +5364 +5365 +5366 +5367 +5368 +5369 +5370 +5371 +5372 +5373 +5374 +5375 +5376 +5377 +5378 +5379 +5380 +5381 +5382 +5383 +5384 +5385 +5386 +5387 +5388 +5389 +5390 +5391 +5392 +5393 +5394 +5395 +5396 +5397 +5398 +5399 +5400 +5401 +5402 +5403 +5404 +5405 +5406 +5407 +5408 +5409 +5410 +5411 +5412 +5413 +5414 +5415 +5416 +5417 +5418 +5419 +5420 +5421 +5422 +5423 +5424 +5425 +5426 +5427 +5428 +5429 +5430 +5431 +5432 +5433 +5434 +5435 +5436 +5437 +5438 +5439 +5440 +5441 +5442 +5443 +5444 +5445 +5446 +5447 +5448 +5449 +5450 +5451 +5452 +5453 +5454 +5455 +5456 +5457 +5458 +5459 +5460 +5461 +5462 +5463 +5464 +5465 +5466 +5467 +5468 +5469 +5470 +5471 +5472 +5473 +5474 +5475 +5476 +5477 +5478 +5479 +5480 +5481 +5482 +5483 +5484 +5485 +5486 +5487 +5488 +5489 +5490 +5491 +5492 +5493 +5494 +5495 +5496 +5497 +5498 +5499 +5500 +5501 +5502 +5503 +5504 +5505 +5506 +5507 +5508 +5509 +5510 +5511 +5512 +5513 +5514 +5515 +5516 +5517 +5518 +5519 +5520 +5521 +5522 +5523 +5524 +5525 +5526 +5527 +5528 +5529 +5530 +5531 +5532 +5533 +5534 +5535 +5536 +5537 +5538 +5539 +5540 +5541 +5542 +5543 +5544 +5545 +5546 +5547 +5548 +5549 +5550 +5551 +5552 +5553 +5554 +5555 +5556 +5557 +5558 +5559 +5560 +5561 +5562 +5563 +5564 +5565 +5566 +5567 +5568 +5569 +5570 +5571 +5572 +5573 +5574 +5575 +5576 +5577 +5578 +5579 +5580 +5581 +5582 +5583 +5584 +5585 +5586 +5587 +5588 +5589 +5590 +5591 +5592 +5593 +5594 +5595 +5596 +5597 +5598 +5599 +5600 +5601 +5602 +5603 +5604 +5605 +5606 +5607 +5608 +5609 +5610 +5611 +5612 +5613 +5614 +5615 +5616 +5617 +5618 +5619 +5620 +5621 +5622 +5623 +5624 +5625 +5626 +5627 +5628 +5629 +5630 +5631 +5632 +5633 +5634 +5635 +5636 +5637 +5638 +5639 +5640 +5641 +5642 +5643 +5644 +5645 +5646 +5647 +5648 +5649 +5650 +5651 +5652 +5653 +5654 +5655 +5656 +5657 +5658 +5659 +5660 +5661 +5662 +5663 +5664 +5665 +5666 +5667 +5668 +5669 +5670 +5671 +5672 +5673 +5674 +5675 +5676 +5677 +5678 +5679 +5680 +5681 +5682 +5683 +5684 +5685 +5686 +5687 +5688 +5689 +5690 +5691 +5692 +5693 +5694 +5695 +5696 +5697 +5698 +5699 +5700 +5701 +5702 +5703 +5704 +5705 +5706 +5707 +5708 +5709 +5710 +5711 +5712 +5713 +5714 +5715 +5716 +5717 +5718 +5719 +5720 +5721 +5722 +5723 +5724 +5725 +5726 +5727 +5728 +5729 +5730 +5731 +5732 +5733 +5734 +5735 +5736 +5737 +5738 +5739 +5740 +5741 +5742 +5743 +5744 +5745 +5746 +5747 +5748 +5749 +5750 +5751 +5752 +5753 +5754 +5755 +5756 +5757 +5758 +5759 +5760 +5761 +5762 +5763 +5764 +5765 +5766 +5767 +5768 +5769 +5770 +5771 +5772 +5773 +5774 +5775 +5776 +5777 +5778 +5779 +5780 +5781 +5782 +5783 +5784 +5785 +5786 +5787 +5788 +5789 +5790 +5791 +5792 +5793 +5794 +5795 +5796 +5797 +5798 +5799 +5800 +5801 +5802 +5803 +5804 +5805 +5806 +5807 +5808 +5809 +5810 +5811 +5812 +5813 +5814 +5815 +5816 +5817 +5818 +5819 +5820 +5821 +5822 +5823 +5824 +5825 +5826 +5827 +5828 +5829 +5830 +5831 +5832 +5833 +5834 +5835 +5836 +5837 +5838 +5839 +5840 +5841 +5842 +5843 +5844 +5845 +5846 +5847 +5848 +5849 +5850 +5851 +5852 +5853 +5854 +5855 +5856 +5857 +5858 +5859 +5860 +5861 +5862 +5863 +5864 +5865 +5866 +5867 +5868 +5869 +5870 +5871 +5872 +5873 +5874 +5875 +5876 +5877 +5878 +5879 +5880 +5881 +5882 +5883 +5884 +5885 +5886 +5887 +5888 +5889 +5890 +5891 +5892 +5893 +5894 +5895 +5896 +5897 +5898 +5899 +5900 +5901 +5902 +5903 +5904 +5905 +5906 +5907 +5908 +5909 +5910 +5911 +5912 +5913 +5914 +5915 +5916 +5917 +5918 +5919 +5920 +5921 +5922 +5923 +5924 +5925 +5926 +5927 +5928 +5929 +5930 +5931 +5932 +5933 +5934 +5935 +5936 +5937 +5938 +5939 +5940 +5941 +5942 +5943 +5944 +5945 +5946 +5947 +5948 +5949 +5950 +5951 +5952 +5953 +5954 +5955 +5956 +5957 +5958 +5959 +5960 +5961 +5962 +5963 +5964 +5965 +5966 +5967 +5968 +5969 +5970 +5971 +5972 +5973 +5974 +5975 +5976 +5977 +5978 +5979 +5980 +5981 +5982 +5983 +5984 +5985 +5986 +5987 +5988 +5989 +5990 +5991 +5992 +5993 +5994 +5995 +5996 +5997 +5998 +5999 +6000 +6001 +6002 +6003 +6004 +6005 +6006 +6007 +6008 +6009 +6010 +6011 +6012 +6013 +6014 +6015 +6016 +6017 +6018 +6019 +6020 +6021 +6022 +6023 +6024 +6025 +6026 +6027 +6028 +6029 +6030 +6031 +6032 +6033 +6034 +6035 +6036 +6037 +6038 +6039 +6040 +6041 +6042 +6043 +6044 +6045 +6046 +6047 +6048 +6049 +6050 +6051 +6052 +6053 +6054 +6055 +6056 +6057 +6058 +6059 +6060 +6061 +6062 +6063 +6064 +6065 +6066 +6067 +6068 +6069 +6070 +6071 +6072 +6073 +6074 +6075 +6076 +6077 +6078 +6079 +6080 +6081 +6082 +6083 +6084 +6085 +6086 +6087 +6088 +6089 +6090 +6091 +6092 +6093 +6094 +6095 +6096 +6097 +6098 +6099 +6100 +6101 +6102 +6103 +6104 +6105 +6106 +6107 +6108 +6109 +6110 +6111 +6112 +6113 +6114 +6115 +6116 +6117 +6118 +6119 +6120 +6121 +6122 +6123 +6124 +6125 +6126 +6127 +6128 +6129 +6130 +6131 +6132 +6133 +6134 +6135 +6136 +6137 +6138 +6139 +6140 +6141 +6142 +6143 +6144 +6145 +6146 +6147 +6148 +6149 +6150 +6151 +6152 +6153 +6154 +6155 +6156 +6157 +6158 +6159 +6160 +6161 +6162 +6163 +6164 +6165 +6166 +6167 +6168 +6169 +6170 +6171 +6172 +6173 +6174 +6175 +6176 +6177 +6178 +6179 +6180 +6181 +6182 +6183 +6184 +6185 +6186 +6187 +6188 +6189 +6190 +6191 +6192 +6193 +6194 +6195 +6196 +6197 +6198 +6199 +6200 +6201 +6202 +6203 +6204 +6205 +6206 +6207 +6208 +6209 +6210 +6211 +6212 +6213 +6214 +6215 +6216 +6217 +6218 +6219 +6220 +6221 +6222 +6223 +6224 +6225 +6226 +6227 +6228 +6229 +6230 +6231 +6232 +6233 +6234 +6235 +6236 +6237 +6238 +6239 +6240 +6241 +6242 +6243 +6244 +6245 +6246 +6247 +6248 +6249 +6250 +6251 +6252 +6253 +6254 +6255 +6256 +6257 +6258 +6259 +6260 +6261 +6262 +6263 +6264 +6265 +6266 +6267 +6268 +6269 +6270 +6271 +6272 +6273 +6274 +6275 +6276 +6277 +6278 +6279 +6280 +6281 +6282 +6283 +6284 +6285 +6286 +6287 +6288 +6289 +6290 +6291 +6292 +6293 +6294 +6295 +6296 +6297 +6298 +6299 +6300 +6301 +6302 +6303 +6304 +6305 +6306 +6307 +6308 +6309 +6310 +6311 +6312 +6313 +6314 +6315 +6316 +6317 +6318 +6319 +6320 +6321 +6322 +6323 +6324 +6325 +6326 +6327 +6328 +6329 +6330 +6331 +6332 +6333 +6334 +6335 +6336 +6337 +6338 +6339 +6340 +6341 +6342 +6343 +6344 +6345 +6346 +6347 +6348 +6349 +6350 +6351 +6352 +6353 +6354 +6355 +6356 +6357 +6358 +6359 +6360 +6361 +6362 +6363 +6364 +6365 +6366 +6367 +6368 +6369 +6370 +6371 +6372 +6373 +6374 +6375 +6376 +6377 +6378 +6379 +6380 +6381 +6382 +6383 +6384 +6385 +6386 +6387 +6388 +6389 +6390 +6391 +6392 +6393 +6394 +6395 +6396 +6397 +6398 +6399 +6400 +6401 +6402 +6403 +6404 +6405 +6406 +6407 +6408 +6409 +6410 +6411 +6412 +6413 +6414 +6415 +6416 +6417 +6418 +6419 +6420 +6421 +6422 +6423 +6424 +6425 +6426 +6427 +6428 +6429 +6430 +6431 +6432 +6433 +6434 +6435 +6436 +6437 +6438 +6439 +6440 +6441 +6442 +6443 +6444 +6445 +6446 +6447 +6448 +6449 +6450 +6451 +6452 +6453 +6454 +6455 +6456 +6457 +6458 +6459 +6460 +6461 +6462 +6463 +6464 +6465 +6466 +6467 +6468 +6469 +6470 +6471 +6472 +6473 +6474 +6475 +6476 +6477 +6478 +6479 +6480 +6481 +6482 +6483 +6484 +6485 +6486 +6487 +6488 +6489 +6490 +6491 +6492 +6493 +6494 +6495 +6496 +6497 +6498 +6499 +6500 +6501 +6502 +6503 +6504 +6505 +6506 +6507 +6508 +6509 +6510 +6511 +6512 +6513 +6514 +6515 +6516 +6517 +6518 +6519 +6520 +6521 +6522 +6523 +6524 +6525 +6526 +6527 +6528 +6529 +6530 +6531 +6532 +6533 +6534 +6535 +6536 +6537 +6538 +6539 +6540 +6541 +6542 +6543 +6544 +6545 +6546 +6547 +6548 +6549 +6550 +6551 +6552 +6553 +6554 +6555 +6556 +6557 +6558 +6559 +6560 +6561 +6562 +6563 +6564 +6565 +6566 +6567 +6568 +6569 +6570 +6571 +6572 +6573 +6574 +6575 +6576 +6577 +6578 +6579 +6580 +6581 +6582 +6583 +6584 +6585 +6586 +6587 +6588 +6589 +6590 +6591 +6592 +6593 +6594 +6595 +6596 +6597 +6598 +6599 +6600 +6601 +6602 +6603 +6604 +6605 +6606 +6607 +6608 +6609 +6610 +6611 +6612 +6613 +6614 +6615 +6616 +6617 +6618 +6619 +6620 +6621 +6622 +6623 +6624 +6625 +6626 +6627 +6628 +6629 +6630 +6631 +6632 +6633 +6634 +6635 +6636 +6637 +6638 +6639 +6640 +6641 +6642 +6643 +6644 +6645 +6646 +6647 +6648 +6649 +6650 +6651 +6652 +6653 +6654 +6655 +6656 +6657 +6658 +6659 +6660 +6661 +6662 +6663 +6664 +6665 +6666 +6667 +6668 +6669 +6670 +6671 +6672 +6673 +6674 +6675 +6676 +6677 +6678 +6679 +6680 +6681 +6682 +6683 +6684 +6685 +6686 +6687 +6688 +6689 +6690 +6691 +6692 +6693 +6694 +6695 +6696 +6697 +6698 +6699 +6700 +6701 +6702 +6703 +6704 +6705 +6706 +6707 +6708 +6709 +6710 +6711 +6712 +6713 +6714 +6715 +6716 +6717 +6718 +6719 +6720 +6721 +6722 +6723 +6724 +6725 +6726 +6727 +6728 +6729 +6730 +6731 +6732 +6733 +6734 +6735 +6736 +6737 +6738 +6739 +6740 +6741 +6742 +6743 +6744 +6745 +6746 +6747 +6748 +6749 +6750 +6751 +6752 +6753 +6754 +6755 +6756 +6757 +6758 +6759 +6760 +6761 +6762 +6763 +6764 +6765 +6766 +6767 +6768 +6769 +6770 +6771 +6772 +6773 +6774 +6775 +6776 +6777 +6778 +6779 +6780 +6781 +6782 +6783 +6784 +6785 +6786 +6787 +6788 +6789 +6790 +6791 +6792 +6793 +6794 +6795 +6796 +6797 +6798 +6799 +6800 +6801 +6802 +6803 +6804 +6805 +6806 +6807 +6808 +6809 +6810 +6811 +6812 +6813 +6814 +6815 +6816 +6817 +6818 +6819 +6820 +6821 +6822 +6823 +6824 +6825 +6826 +6827 +6828 +6829 +6830 +6831 +6832 +6833 +6834 +6835 +6836 +6837 +6838 +6839 +6840 +6841 +6842 +6843 +6844 +6845 +6846 +6847 +6848 +6849 +6850 +6851 +6852 +6853 +6854 +6855 +6856 +6857 +6858 +6859 +6860 +6861 +6862 +6863 +6864 +6865 +6866 +6867 +6868 +6869 +6870 +6871 +6872 +6873 +6874 +6875 +6876 +6877 +6878 +6879 +6880 +6881 +6882 +6883 +6884 +6885 +6886 +6887 +6888 +6889 +6890 +6891 +6892 +6893 +6894 +6895 +6896 +6897 +6898 +6899 +6900 +6901 +6902 +6903 +6904 +6905 +6906 +6907 +6908 +6909 +6910 +6911 +6912 +6913 +6914 +6915 +6916 +6917 +6918 +6919 +6920 +6921 +6922 +6923 +6924 +6925 +6926 +6927 +6928 +6929 +6930 +6931 +6932 +6933 +6934 +6935 +6936 +6937 +6938 +6939 +6940 +6941 +6942 +6943 +6944 +6945 +6946 +6947 +6948 +6949 +6950 +6951 +6952 +6953 +6954 +6955 +6956 +6957 +6958 +6959 +6960 +6961 +6962 +6963 +6964 +6965 +6966 +6967 +6968 +6969 +6970 +6971 +6972 +6973 +6974 +6975 +6976 +6977 +6978 +6979 +6980 +6981 +6982 +6983 +6984 +6985 +6986 +6987 +6988 +6989 +6990 +6991 +6992 +6993 +6994 +6995 +6996 +6997 +6998 +6999 +7000 +7001 +7002 +7003 +7004 +7005 +7006 +7007 +7008 +7009 +7010 +7011 +7012 +7013 +7014 +7015 +7016 +7017 +7018 +7019 +7020 +7021 +7022 +7023 +7024 +7025 +7026 +7027 +7028 +7029 +7030 +7031 +7032 +7033 +7034 +7035 +7036 +7037 +7038 +7039 +7040 +7041 +7042 +7043 +7044 +7045 +7046 +7047 +7048 +7049 +7050 +7051 +7052 +7053 +7054 +7055 +7056 +7057 +7058 +7059 +7060 +7061 +7062 +7063 +7064 +7065 +7066 +7067 +7068 +7069 +7070 +7071 +7072 +7073 +7074 +7075 +7076 +7077 +7078 +7079 +7080 +7081 +7082 +7083 +7084 +7085 +7086 +7087 +7088 +7089 +7090 +7091 +7092 +7093 +7094 +7095 +7096 +7097 +7098 +7099 +7100 +7101 +7102 +7103 +7104 +7105 +7106 +7107 +7108 +7109 +7110 +7111 +7112 +7113 +7114 +7115 +7116 +7117 +7118 +7119 +7120 +7121 +7122 +7123 +7124 +7125 +7126 +7127 +7128 +7129 +7130 +7131 +7132 +7133 +7134 +7135 +7136 +7137 +7138 +7139 +7140 +7141 +7142 +7143 +7144 +7145 +7146 +7147 +7148 +7149 +7150 +7151 +7152 +7153 +7154 +7155 +7156 +7157 +7158 +7159 +7160 +7161 +7162 +7163 +7164 +7165 +7166 +7167 +7168 +7169 +7170 +7171 +7172 +7173 +7174 +7175 +7176 +7177 +7178 +7179 +7180 +7181 +7182 +7183 +7184 +7185 +7186 +7187 +7188 +7189 +7190 +7191 +7192 +7193 +7194 +7195 +7196 +7197 +7198 +7199 +7200 +7201 +7202 +7203 +7204 +7205 +7206 +7207 +7208 +7209 +7210 +7211 +7212 +7213 +7214 +7215 +7216 +7217 +7218 +7219 +7220 +7221 +7222 +7223 +7224 +7225 +7226 +7227 +7228 +7229 +7230 +7231 +7232 +7233 +7234 +7235 +7236 +7237 +7238 +7239 +7240 +7241 +7242 +7243 +7244 +7245 +7246 +7247 +7248 +7249 +7250 +7251 +7252 +7253 +7254 +7255 +7256 +7257 +7258 +7259 +7260 +7261 +7262 +7263 +7264 +7265 +7266 +7267 +7268 +7269 +7270 +7271 +7272 +7273 +7274 +7275 +7276 +7277 +7278 +7279 +7280 +7281 +7282 +7283 +7284 +7285 +7286 +7287 +7288 +7289 +7290 +7291 +7292 +7293 +7294 +7295 +7296 +7297 +7298 +7299 +7300 +7301 +7302 +7303 +7304 +7305 +7306 +7307 +7308 +7309 +7310 +7311 +7312 +7313 +7314 +7315 +7316 +7317 +7318 +7319 +7320 +7321 +7322 +7323 +7324 +7325 +7326 +7327 +7328 +7329 +7330 +7331 +7332 +7333 +7334 +7335 +7336 +7337 +7338 +7339 +7340 +7341 +7342 +7343 +7344 +7345 +7346 +7347 +7348 +7349 +7350 +7351 +7352 +7353 +7354 +7355 +7356 +7357 +7358 +7359 +7360 +7361 +7362 +7363 +7364 +7365 +7366 +7367 +7368 +7369 +7370 +7371 +7372 +7373 +7374 +7375 +7376 +7377 +7378 +7379 +7380 +7381 +7382 +7383 +7384 +7385 +7386 +7387 +7388 +7389 +7390 +7391 +7392 +7393 +7394 +7395 +7396 +7397 +7398 +7399 +7400 +7401 +7402 +7403 +7404 +7405 +7406 +7407 +7408 +7409 +7410 +7411 +7412 +7413 +7414 +7415 +7416 +7417 +7418 +7419 +7420 +7421 +7422 +7423 +7424 +7425 +7426 +7427 +7428 +7429 +7430 +7431 +7432 +7433 +7434 +7435 +7436 +7437 +7438 +7439 +7440 +7441 +7442 +7443 +7444 +7445 +7446 +7447 +7448 +7449 +7450 +7451 +7452 +7453 +7454 +7455 +7456 +7457 +7458 +7459 +7460 +7461 +7462 +7463 +7464 +7465 +7466 +7467 +7468 +7469 +7470 +7471 +7472 +7473 +7474 +7475 +7476 +7477 +7478 +7479 +7480 +7481 +7482 +7483 +7484 +7485 +7486 +7487 +7488 +7489 +7490 +7491 +7492 +7493 +7494 +7495 +7496 +7497 +7498 +7499 +7500 +7501 +7502 +7503 +7504 +7505 +7506 +7507 +7508 +7509 +7510 +7511 +7512 +7513 +7514 +7515 +7516 +7517 +7518 +7519 +7520 +7521 +7522 +7523 +7524 +7525 +7526 +7527 +7528 +7529 +7530 +7531 +7532 +7533 +7534 +7535 +7536 +7537 +7538 +7539 +7540 +7541 +7542 +7543 +7544 +7545 +7546 +7547 +7548 +7549 +7550 +7551 +7552 +7553 +7554 +7555 +7556 +7557 +7558 +7559 +7560 +7561 +7562 +7563 +7564 +7565 +7566 +7567 +7568 +7569 +7570 +7571 +7572 +7573 +7574 +7575 +7576 +7577 +7578 +7579 +7580 +7581 +7582 +7583 +7584 +7585 +7586 +7587 +7588 +7589 +7590 +7591 +7592 +7593 +7594 +7595 +7596 +7597 +7598 +7599 +7600 +7601 +7602 +7603 +7604 +7605 +7606 +7607 +7608 +7609 +7610 +7611 +7612 +7613 +7614 +7615 +7616 +7617 +7618 +7619 +7620 +7621 +7622 +7623 +7624 +7625 +7626 +7627 +7628 +7629 +7630 +7631 +7632 +7633 +7634 +7635 +7636 +7637 +7638 +7639 +7640 +7641 +7642 +7643 +7644 +7645 +7646 +7647 +7648 +7649 +7650 +7651 +7652 +7653 +7654 +7655 +7656 +7657 +7658 +7659 +7660 +7661 +7662 +7663 +7664 +7665 +7666 +7667 +7668 +7669 +7670 +7671 +7672 +7673 +7674 +7675 +7676 +7677 +7678 +7679 +7680 +7681 +7682 +7683 +7684 +7685 +7686 +7687 +7688 +7689 +7690 +7691 +7692 +7693 +7694 +7695 +7696 +7697 +7698 +7699 +7700 +7701 +7702 +7703 +7704 +7705 +7706 +7707 +7708 +7709 +7710 +7711 +7712 +7713 +7714 +7715 +7716 +7717 +7718 +7719 +7720 +7721 +7722 +7723 +7724 +7725 +7726 +7727 +7728 +7729 +7730 +7731 +7732 +7733 +7734 +7735 +7736 +7737 +7738 +7739 +7740 +7741 +7742 +7743 +7744 +7745 +7746 +7747 +7748 +7749 +7750 +7751 +7752 +7753 +7754 +7755 +7756 +7757 +7758 +7759 +7760 +7761 +7762 +7763 +7764 +7765 +7766 +7767 +7768 +7769 +7770 +7771 +7772 +7773 +7774 +7775 +7776 +7777 +7778 +7779 +7780 +7781 +7782 +7783 +7784 +7785 +7786 +7787 +7788 +7789 +7790 +7791 +7792 +7793 +7794 +7795 +7796 +7797 +7798 +7799 +7800 +7801 +7802 +7803 +7804 +7805 +7806 +7807 +7808 +7809 +7810 +7811 +7812 +7813 +7814 +7815 +7816 +7817 +7818 +7819 +7820 +7821 +7822 +7823 +7824 +7825 +7826 +7827 +7828 +7829 +7830 +7831 +7832 +7833 +7834 +7835 +7836 +7837 +7838 +7839 +7840 +7841 +7842 +7843 +7844 +7845 +7846 +7847 +7848 +7849 +7850 +7851 +7852 +7853 +7854 +7855 +7856 +7857 +7858 +7859 +7860 +7861 +7862 +7863 +7864 +7865 +7866 +7867 +7868 +7869 +7870 +7871 +7872 +7873 +7874 +7875 +7876 +7877 +7878 +7879 +7880 +7881 +7882 +7883 +7884 +7885 +7886 +7887 +7888 +7889 +7890 +7891 +7892 +7893 +7894 +7895 +7896 +7897 +7898 +7899 +7900 +7901 +7902 +7903 +7904 +7905 +7906 +7907 +7908 +7909 +7910 +7911 +7912 +7913 +7914 +7915 +7916 +7917 +7918 +7919 +7920 +7921 +7922 +7923 +7924 +7925 +7926 +7927 +7928 +7929 +7930 +7931 +7932 +7933 +7934 +7935 +7936 +7937 +7938 +7939 +7940 +7941 +7942 +7943 +7944 +7945 +7946 +7947 +7948 +7949 +7950 +7951 +7952 +7953 +7954 +7955 +7956 +7957 +7958 +7959 +7960 +7961 +7962 +7963 +7964 +7965 +7966 +7967 +7968 +7969 +7970 +7971 +7972 +7973 +7974 +7975 +7976 +7977 +7978 +7979 +7980 +7981 +7982 +7983 +7984 +7985 +7986 +7987 +7988 +7989 +7990 +7991 +7992 +7993 +7994 +7995 +7996 +7997 +7998 +7999 +8000 +8001 +8002 +8003 +8004 +8005 +8006 +8007 +8008 +8009 +8010 +8011 +8012 +8013 +8014 +8015 +8016 +8017 +8018 +8019 +8020 +8021 +8022 +8023 +8024 +8025 +8026 +8027 +8028 +8029 +8030 +8031 +8032 +8033 +8034 +8035 +8036 +8037 +8038 +8039 +8040 +8041 +8042 +8043 +8044 +8045 +8046 +8047 +8048 +8049 +8050 +8051 +8052 +8053 +8054 +8055 +8056 +8057 +8058 +8059 +8060 +8061 +8062 +8063 +8064 +8065 +8066 +8067 +8068 +8069 +8070 +8071 +8072 +8073 +8074 +8075 +8076 +8077 +8078 +8079 +8080 +8081 +8082 +8083 +8084 +8085 +8086 +8087 +8088 +8089 +8090 +8091 +8092 +8093 +8094 +8095 +8096 +8097 +8098 +8099 +8100 +8101 +8102 +8103 +8104 +8105 +8106 +8107 +8108 +8109 +8110 +8111 +8112 +8113 +8114 +8115 +8116 +8117 +8118 +8119 +8120 +8121 +8122 +8123 +8124 +8125 +8126 +8127 +8128 +8129 +8130 +8131 +8132 +8133 +8134 +8135 +8136 +8137 +8138 +8139 +8140 +8141 +8142 +8143 +8144 +8145 +8146 +8147 +8148 +8149 +8150 +8151 +8152 +8153 +8154 +8155 +8156 +8157 +8158 +8159 +8160 +8161 +8162 +8163 +8164 +8165 +8166 +8167 +8168 +8169 +8170 +8171 +8172 +8173 +8174 +8175 +8176 +8177 +8178 +8179 +8180 +8181 +8182 +8183 +8184 +8185 +8186 +8187 +8188 +8189 +8190 +8191 +8192 +8193 +8194 +8195 +8196 +8197 +8198 +8199 +8200 +8201 +8202 +8203 +8204 +8205 +8206 +8207 +8208 +8209 +8210 +8211 +8212 +8213 +8214 +8215 +8216 +8217 +8218 +8219 +8220 +8221 +8222 +8223 +8224 +8225 +8226 +8227 +8228 +8229 +8230 +8231 +8232 +8233 +8234 +8235 +8236 +8237 +8238 +8239 +8240 +8241 +8242 +8243 +8244 +8245 +8246 +8247 +8248 +8249 +8250 +8251 +8252 +8253 +8254 +8255 +8256 +8257 +8258 +8259 +8260 +8261 +8262 +8263 +8264 +8265 +8266 +8267 +8268 +8269 +8270 +8271 +8272 +8273 +8274 +8275 +8276 +8277 +8278 +8279 +8280 +8281 +8282 +8283 +8284 +8285 +8286 +8287 +8288 +8289 +8290 +8291 +8292 +8293 +8294 +8295 +8296 +8297 +8298 +8299 +8300 +8301 +8302 +8303 +8304 +8305 +8306 +8307 +8308 +8309 +8310 +8311 +8312 +8313 +8314 +8315 +8316 +8317 +8318 +8319 +8320 +8321 +8322 +8323 +8324 +8325 +8326 +8327 +8328 +8329 +8330 +8331 +8332 +8333 +8334 +8335 +8336 +8337 +8338 +8339 +8340 +8341 +8342 +8343 +8344 +8345 +8346 +8347 +8348 +8349 +8350 +8351 +8352 +8353 +8354 +8355 +8356 +8357 +8358 +8359 +8360 +8361 +8362 +8363 +8364 +8365 +8366 +8367 +8368 +8369 +8370 +8371 +8372 +8373 +8374 +8375 +8376 +8377 +8378 +8379 +8380 +8381 +8382 +8383 +8384 +8385 +8386 +8387 +8388 +8389 +8390 +8391 +8392 +8393 +8394 +8395 +8396 +8397 +8398 +8399 +8400 +8401 +8402 +8403 +8404 +8405 +8406 +8407 +8408 +8409 +8410 +8411 +8412 +8413 +8414 +8415 +8416 +8417 +8418 +8419 +8420 +8421 +8422 +8423 +8424 +8425 +8426 +8427 +8428 +8429 +8430 +8431 +8432 +8433 +8434 +8435 +8436 +8437 +8438 +8439 +8440 +8441 +8442 +8443 +8444 +8445 +8446 +8447 +8448 +8449 +8450 +8451 +8452 +8453 +8454 +8455 +8456 +8457 +8458 +8459 +8460 +8461 +8462 +8463 +8464 +8465 +8466 +8467 +8468 +8469 +8470 +8471 +8472 +8473 +8474 +8475 +8476 +8477 +8478 +8479 +8480 +8481 +8482 +8483 +8484 +8485 +8486 +8487 +8488 +8489 +8490 +8491 +8492 +8493 +8494 +8495 +8496 +8497 +8498 +8499 +8500 +8501 +8502 +8503 +8504 +8505 +8506 +8507 +8508 +8509 +8510 +8511 +8512 +8513 +8514 +8515 +8516 +8517 +8518 +8519 +8520 +8521 +8522 +8523 +8524 +8525 +8526 +8527 +8528 +8529 +8530 +8531 +8532 +8533 +8534 +8535 +8536 +8537 +8538 +8539 +8540 +8541 +8542 +8543 +8544 +8545 +8546 +8547 +8548 +8549 +8550 +8551 +8552 +8553 +8554 +8555 +8556 +8557 +8558 +8559 +8560 +8561 +8562 +8563 +8564 +8565 +8566 +8567 +8568 +8569 +8570 +8571 +8572 +8573 +8574 +8575 +8576 +8577 +8578 +8579 +8580 +8581 +8582 +8583 +8584 +8585 +8586 +8587 +8588 +8589 +8590 +8591 +8592 +8593 +8594 +8595 +8596 +8597 +8598 +8599 +8600 +8601 +8602 +8603 +8604 +8605 +8606 +8607 +8608 +8609 +8610 +8611 +8612 +8613 +8614 +8615 +8616 +8617 +8618 +8619 +8620 +8621 +8622 +8623 +8624 +8625 +8626 +8627 +8628 +8629 +8630 +8631 +8632 +8633 +8634 +8635 +8636 +8637 +8638 +8639 +8640 +8641 +8642 +8643 +8644 +8645 +8646 +8647 +8648 +8649 +8650 +8651 +8652 +8653 +8654 +8655 +8656 +8657 +8658 +8659 +8660 +8661 +8662 +8663 +8664 +8665 +8666 +8667 +8668 +8669 +8670 +8671 +8672 +8673 +8674 +8675 +8676 +8677 +8678 +8679 +8680 +8681 +8682 +8683 +8684 +8685 +8686 +8687 +8688 +8689 +8690 +8691 +8692 +8693 +8694 +8695 +8696 +8697 +8698 +8699 +8700 +8701 +8702 +8703 +8704 +8705 +8706 +8707 +8708 +8709 +8710 +8711 +8712 +8713 +8714 +8715 +8716 +8717 +8718 +8719 +8720 +8721 +8722 +8723 +8724 +8725 +8726 +8727 +8728 +8729 +8730 +8731 +8732 +8733 +8734 +8735 +8736 +8737 +8738 +8739 +8740 +8741 +8742 +8743 +8744 +8745 +8746 +8747 +8748 +8749 +8750 +8751 +8752 +8753 +8754 +8755 +8756 +8757 +8758 +8759 +8760 +8761 +8762 +8763 +8764 +8765 +8766 +8767 +8768 +8769 +8770 +8771 +8772 +8773 +8774 +8775 +8776 +8777 +8778 +8779 +8780 +8781 +8782 +8783 +8784 +8785 +8786 +8787 +8788 +8789 +8790 +8791 +8792 +8793 +8794 +8795 +8796 +8797 +8798 +8799 +8800 +8801 +8802 +8803 +8804 +8805 +8806 +8807 +8808 +8809 +8810 +8811 +8812 +8813 +8814 +8815 +8816 +8817 +8818 +8819 +8820 +8821 +8822 +8823 +8824 +8825 +8826 +8827 +8828 +8829 +8830 +8831 +8832 +8833 +8834 +8835 +8836 +8837 +8838 +8839 +8840 +8841 +8842 +8843 +8844 +8845 +8846 +8847 +8848 +8849 +8850 +8851 +8852 +8853 +8854 +8855 +8856 +8857 +8858 +8859 +8860 +8861 +8862 +8863 +8864 +8865 +8866 +8867 +8868 +8869 +8870 +8871 +8872 +8873 +8874 +8875 +8876 +8877 +8878 +8879 +8880 +8881 +8882 +8883 +8884 +8885 +8886 +8887 +8888 +8889 +8890 +8891 +8892 +8893 +8894 +8895 +8896 +8897 +8898 +8899 +8900 +8901 +8902 +8903 +8904 +8905 +8906 +8907 +8908 +8909 +8910 +8911 +8912 +8913 +8914 +8915 +8916 +8917 +8918 +8919 +8920 +8921 +8922 +8923 +8924 +8925 +8926 +8927 +8928 +8929 +8930 +8931 +8932 +8933 +8934 +8935 +8936 +8937 +8938 +8939 +8940 +8941 +8942 +8943 +8944 +8945 +8946 +8947 +8948 +8949 +8950 +8951 +8952 +8953 +8954 +8955 +8956 +8957 +8958 +8959 +8960 +8961 +8962 +8963 +8964 +8965 +8966 +8967 +8968 +8969 +8970 +8971 +8972 +8973 +8974 +8975 +8976 +8977 +8978 +8979 +8980 +8981 +8982 +8983 +8984 +8985 +8986 +8987 +8988 +8989 +8990 +8991 +8992 +8993 +8994 +8995 +8996 +8997 +8998 +8999 +9000 +9001 +9002 +9003 +9004 +9005 +9006 +9007 +9008 +9009 +9010 +9011 +9012 +9013 +9014 +9015 +9016 +9017 +9018 +9019 +9020 +9021 +9022 +9023 +9024 +9025 +9026 +9027 +9028 +9029 +9030 +9031 +9032 +9033 +9034 +9035 +9036 +9037 +9038 +9039 +9040 +9041 +9042 +9043 +9044 +9045 +9046 +9047 +9048 +9049 +9050 +9051 +9052 +9053 +9054 +9055 +9056 +9057 +9058 +9059 +9060 +9061 +9062 +9063 +9064 +9065 +9066 +9067 +9068 +9069 +9070 +9071 +9072 +9073 +9074 +9075 +9076 +9077 +9078 +9079 +9080 +9081 +9082 +9083 +9084 +9085 +9086 +9087 +9088 +9089 +9090 +9091 +9092 +9093 +9094 +9095 +9096 +9097 +9098 +9099 +9100 +9101 +9102 +9103 +9104 +9105 +9106 +9107 +9108 +9109 +9110 +9111 +9112 +9113 +9114 +9115 +9116 +9117 +9118 +9119 +9120 +9121 +9122 +9123 +9124 +9125 +9126 +9127 +9128 +9129 +9130 +9131 +9132 +9133 +9134 +9135 +9136 +9137 +9138 +9139 +9140 +9141 +9142 +9143 +9144 +9145 +9146 +9147 +9148 +9149 +9150 +9151 +9152 +9153 +9154 +9155 +9156 +9157 +9158 +9159 +9160 +9161 +9162 +9163 +9164 +9165 +9166 +9167 +9168 +9169 +9170 +9171 +9172 +9173 +9174 +9175 +9176 +9177 +9178 +9179 +9180 +9181 +9182 +9183 +9184 +9185 +9186 +9187 +9188 +9189 +9190 +9191 +9192 +9193 +9194 +9195 +9196 +9197 +9198 +9199 +9200 +9201 +9202 +9203 +9204 +9205 +9206 +9207 +9208 +9209 +9210 +9211 +9212 +9213 +9214 +9215 +9216 +9217 +9218 +9219 +9220 +9221 +9222 +9223 +9224 +9225 +9226 +9227 +9228 +9229 +9230 +9231 +9232 +9233 +9234 +9235 +9236 +9237 +9238 +9239 +9240 +9241 +9242 +9243 +9244 +9245 +9246 +9247 +9248 +9249 +9250 +9251 +9252 +9253 +9254 +9255 +9256 +9257 +9258 +9259 +9260 +9261 +9262 +9263 +9264 +9265 +9266 +9267 +9268 +9269 +9270 +9271 +9272 +9273 +9274 +9275 +9276 +9277 +9278 +9279 +9280 +9281 +9282 +9283 +9284 +9285 +9286 +9287 +9288 +9289 +9290 +9291 +9292 +9293 +9294 +9295 +9296 +9297 +9298 +9299 +9300 +9301 +9302 +9303 +9304 +9305 +9306 +9307 +9308 +9309 +9310 +9311 +9312 +9313 +9314 +9315 +9316 +9317 +9318 +9319 +9320 +9321 +9322 +9323 +9324 +9325 +9326 +9327 +9328 +9329 +9330 +9331 +9332 +9333 +9334 +9335 +9336 +9337 +9338 +9339 +9340 +9341 +9342 +9343 +9344 +9345 +9346 +9347 +9348 +9349 +9350 +9351 +9352 +9353 +9354 +9355 +9356 +9357 +9358 +9359 +9360 +9361 +9362 +9363 +9364 +9365 +9366 +9367 +9368 +9369 +9370 +9371 +9372 +9373 +9374 +9375 +9376 +9377 +9378 +9379 +9380 +9381 +9382 +9383 +9384 +9385 +9386 +9387 +9388 +9389 +9390 +9391 +9392 +9393 +9394 +9395 +9396 +9397 +9398 +9399 +9400 +9401 +9402 +9403 +9404 +9405 +9406 +9407 +9408 +9409 +9410 +9411 +9412 +9413 +9414 +9415 +9416 +9417 +9418 +9419 +9420 +9421 +9422 +9423 +9424 +9425 +9426 +9427 +9428 +9429 +9430 +9431 +9432 +9433 +9434 +9435 +9436 +9437 +9438 +9439 +9440 +9441 +9442 +9443 +9444 +9445 +9446 +9447 +9448 +9449 +9450 +9451 +9452 +9453 +9454 +9455 +9456 +9457 +9458 +9459 +9460 +9461 +9462 +9463 +9464 +9465 +9466 +9467 +9468 +9469 +9470 +9471 +9472 +9473 +9474 +9475 +9476 +9477 +9478 +9479 +9480 +9481 +9482 +9483 +9484 +9485 +9486 +9487 +9488 +9489 +9490 +9491 +9492 +9493 +9494 +9495 +9496 +9497 +9498 +9499 +9500 +9501 +9502 +9503 +9504 +9505 +9506 +9507 +9508 +9509 +9510 +9511 +9512 +9513 +9514 +9515 +9516 +9517 +9518 +9519 +9520 +9521 +9522 +9523 +9524 +9525 +9526 +9527 +9528 +9529 +9530 +9531 +9532 +9533 +9534 +9535 +9536 +9537 +9538 +9539 +9540 +9541 +9542 +9543 +9544 +9545 +9546 +9547 +9548 +9549 +9550 +9551 +9552 +9553 +9554 +9555 +9556 +9557 +9558 +9559 +9560 +9561 +9562 +9563 +9564 +9565 +9566 +9567 +9568 +9569 +9570 +9571 +9572 +9573 +9574 +9575 +9576 +9577 +9578 +9579 +9580 +9581 +9582 +9583 +9584 +9585 +9586 +9587 +9588 +9589 +9590 +9591 +9592 +9593 +9594 +9595 +9596 +9597 +9598 +9599 +9600 +9601 +9602 +9603 +9604 +9605 +9606 +9607 +9608 +9609 +9610 +9611 +9612 +9613 +9614 +9615 +9616 +9617 +9618 +9619 +9620 +9621 +9622 +9623 +9624 +9625 +9626 +9627 +9628 +9629 +9630 +9631 +9632 +9633 +9634 +9635 +9636 +9637 +9638 +9639 +9640 +9641 +9642 +9643 +9644 +9645 +9646 +9647 +9648 +9649 +9650 +9651 +9652 +9653 +9654 +9655 +9656 +9657 +9658 +9659 +9660 +9661 +9662 +9663 +9664 +9665 +9666 +9667 +9668 +9669 +9670 +9671 +9672 +9673 +9674 +9675 +9676 +9677 +9678 +9679 +9680 +9681 +9682 +9683 +9684 +9685 +9686 +9687 +9688 +9689 +9690 +9691 +9692 +9693 +9694 +9695 +9696 +9697 +9698 +9699 +9700 +9701 +9702 +9703 +9704 +9705 +9706 +9707 +9708 +9709 +9710 +9711 +9712 +9713 +9714 +9715 +9716 +9717 +9718 +9719 +9720 +9721 +9722 +9723 +9724 +9725 +9726 +9727 +9728 +9729 +9730 +9731 +9732 +9733 +9734 +9735 +9736 +9737 +9738 +9739 +9740 +9741 +9742 +9743 +9744 +9745 +9746 +9747 +9748 +9749 +9750 +9751 +9752 +9753 +9754 +9755 +9756 +9757 +9758 +9759 +9760 +9761 +9762 +9763 +9764 +9765 +9766 +9767 +9768 +9769 +9770 +9771 +9772 +9773 +9774 +9775 +9776 +9777 +9778 +9779 +9780 +9781 +9782 +9783 +9784 +9785 +9786 +9787 +9788 +9789 +9790 +9791 +9792 +9793 +9794 +9795 +9796 +9797 +9798 +9799 +9800 +9801 +9802 +9803 +9804 +9805 +9806 +9807 +9808 +9809 +9810 +9811 +9812 +9813 +9814 +9815 +9816 +9817 +9818 +9819 +9820 +9821 +9822 +9823 +9824 +9825 +9826 +9827 +9828 +9829 +9830 +9831 +9832 +9833 +9834 +9835 +9836 +9837 +9838 +9839 +9840 +9841 +9842 +9843 +9844 +9845 +9846 +9847 +9848 +9849 +9850 +9851 +9852 +9853 +9854 +9855 +9856 +9857 +9858 +9859 +9860 +9861 +9862 +9863 +9864 +9865 +9866 +9867 +9868 +9869 +9870 +9871 +9872 +9873 +9874 +9875 +9876 +9877 +9878 +9879 +9880 +9881 +9882 +9883 +9884 +9885 +9886 +9887 +9888 +9889 +9890 +9891 +9892 +9893 +9894 +9895 +9896 +9897 +9898 +9899 +9900 +9901 +9902 +9903 +9904 +9905 +9906 +9907 +9908 +9909 +9910 +9911 +9912 +9913 +9914 +9915 +9916 +9917 +9918 +9919 +9920 +9921 +9922 +9923 +9924 +9925 +9926 +9927 +9928 +9929 +9930 +9931 +9932 +9933 +9934 +9935 +9936 +9937 +9938 +9939 +9940 +9941 +9942 +9943 +9944 +9945 +9946 +9947 +9948 +9949 +9950 +9951 +9952 +9953 +9954 +9955 +9956 +9957 +9958 +9959 +9960 +9961 +9962 +9963 +9964 +9965 +9966 +9967 +9968 +9969 +9970 +9971 +9972 +9973 +9974 +9975 +9976 +9977 +9978 +9979 +9980 +9981 +9982 +9983 +9984 +9985 +9986 +9987 +9988 +9989 +9990 +9991 +9992 +9993 +9994 +9995 +9996 +9997 +9998 +9999 +10000 diff --git a/data/contestants.1.csv b/data/contestants.1.csv new file mode 100644 index 000000000..bdd28c4d4 --- /dev/null +++ b/data/contestants.1.csv @@ -0,0 +1,5 @@ +a,1990-01-10,2090,97.1,XA ,{a} +b,1990-11-01,2203,98.1,XA ,"{a,b}" +c,1988-11-01,2907,99.4,XB ,"{w,y}" +d,1985-05-05,2314,98.3,XB ,{} +e,1995-05-05,2236,98.2,XC ,{a} diff --git a/data/contestants.2.csv b/data/contestants.2.csv new file mode 100644 index 000000000..1a4506bc3 --- /dev/null +++ b/data/contestants.2.csv @@ -0,0 +1,3 @@ +f,1983-04-02,3090,99.6,XD ,"{a,b,c,y}" +g,1991-12-13,1803,85.1,XD ,"{a,c}" +h,1987-10-26,2112,95.4,XD ,"{w,a}" diff --git a/data/datetime_types.csv b/data/datetime_types.csv new file mode 100644 index 000000000..b5c2eb099 --- /dev/null +++ b/data/datetime_types.csv @@ -0,0 +1,2 @@ +2000-01-02 04:05:06,1999-01-08 14:05:06+02,2000-01-02,04:05:06,04:00:00 +1970-01-01 00:00:00,infinity,-infinity,00:00:00,00:00:00 diff --git a/data/enum_and_composite_types.csv b/data/enum_and_composite_types.csv new file mode 100644 index 000000000..979f2ebc3 --- /dev/null +++ b/data/enum_and_composite_types.csv @@ -0,0 +1,2 @@ +a,"(2,b)" +b,"(3,c)" diff --git a/data/null_values.csv b/data/null_values.csv new file mode 100644 index 000000000..7ddd67b24 --- /dev/null +++ b/data/null_values.csv @@ -0,0 +1,2 @@ +,{NULL},"(,)" +,, diff --git a/data/other_types.csv b/data/other_types.csv new file mode 100644 index 000000000..487f386fb --- /dev/null +++ b/data/other_types.csv @@ -0,0 +1,2 @@ +f,\xdeadbeef,$1.00,192.168.1.2,10101,a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,"{""key"": ""value""}" +t,\xcdb0,$1.50,127.0.0.1,"",a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11,[] diff --git a/data/range_types.csv b/data/range_types.csv new file mode 100644 index 000000000..db0ca880c --- /dev/null +++ b/data/range_types.csv @@ -0,0 +1,2 @@ +"[1,3)","[1,3)","[1,3)","[""2000-01-02 00:30:00"",""2010-02-03 12:30:00"")" +empty,"[1,)","(,)",empty diff --git a/expected/alter.out b/expected/alter.out new file mode 100644 index 000000000..659e2723e --- /dev/null +++ b/expected/alter.out @@ -0,0 +1,178 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +-- test analyze +ANALYZE test_alter_table; +-- verify select queries run as expected +SELECT * FROM test_alter_table; + b | c +---+--- + 2 | 3 + 5 | 6 + 8 | 9 +(3 rows) + +SELECT a FROM test_alter_table; +ERROR: column "a" does not exist +LINE 1: SELECT a FROM test_alter_table; + ^ +SELECT b FROM test_alter_table; + b +--- + 2 + 5 + 8 +(3 rows) + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +ERROR: INSERT has more expressions than target columns +LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); + ^ +INSERT INTO test_alter_table (SELECT 5, 8); +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | +(4 rows) + +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | + 3 | 5 | 8 +(5 rows) + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 +(5 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 + 1 | 2 | 4 | 8 +(6 rows) + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME +(6 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME + 1 | 2 | 4 | 8 | ABCDEF +(7 rows) + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; + b | d | f +---+---+--------- + 2 | | TEXT ME + 5 | | TEXT ME + 8 | | TEXT ME + 5 | | TEXT ME + 3 | 8 | TEXT ME + 1 | 4 | TEXT ME + 1 | 4 | ABCDEF +(7 rows) + +SELECT count(*) from test_alter_table; + count +------- + 7 +(1 row) + +SELECT count(t.*) from test_alter_table t; + count +------- + 7 +(1 row) + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "g" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "h" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + b | d | f | g | h +---+---+---------+---+--- + 2 | | TEXT ME | | + 5 | | TEXT ME | | + 8 | | TEXT ME | | + 5 | | TEXT ME | | + 3 | 8 | TEXT ME | | + 1 | 4 | TEXT ME | | + 1 | 4 | ABCDEF | | +(7 rows) + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; +ERROR: Column j cannot be cast automatically to type pg_catalog.int4 +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; +DROP FOREIGN TABLE test_alter_table; diff --git a/expected/analyze.out b/expected/analyze.out new file mode 100644 index 000000000..f8c4d974a --- /dev/null +++ b/expected/analyze.out @@ -0,0 +1,19 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + count +------- + 6 +(1 row) + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; + count +------- + 6 +(1 row) + diff --git a/expected/drop.out b/expected/drop.out new file mode 100644 index 000000000..dc5678da7 --- /dev/null +++ b/expected/drop.out @@ -0,0 +1,97 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 2 +(1 row) + +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +DROP SCHEMA test_schema CASCADE; +NOTICE: drop cascades to foreign table test_schema.test_table +-- Check that the files have been deleted and the directory is empty after the +-- DROP table command. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +SELECT current_database() datname \gset +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +-- should see 2 files, data and footer file for single table +SELECT count(*) FROM pg_ls_dir('cstore_fdw/' || :databaseoid); + count +------- + 2 +(1 row) + +-- should see 2 directories 1 for each database, excluding postgres database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + count +------- + 2 +(1 row) + +DROP EXTENSION cstore_fdw CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to server cstore_server +drop cascades to foreign table test_table +-- should only see 1 directory here +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + count +------- + 1 +(1 row) + +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +-- should see 2 directories 1 for each database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + count +------- + 2 +(1 row) + +\c :datname +DROP DATABASE db_to_drop; +-- should only see 1 directory for the default database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + count +------- + 1 +(1 row) + diff --git a/expected/functions.out b/expected/functions.out new file mode 100644 index 000000000..117fc15f9 --- /dev/null +++ b/expected/functions.out @@ -0,0 +1,18 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); +COPY table_with_data FROM STDIN; +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); + ?column? +---------- + t +(1 row) + +SELECT cstore_table_size('non_cstore_table'); +ERROR: relation is not a cstore table +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/expected/insert.out b/expected/insert.out new file mode 100644 index 000000000..49d9ed132 --- /dev/null +++ b/expected/insert.out @@ -0,0 +1,88 @@ +-- +-- Testing insert on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +-- test single row inserts fail +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command values(1); +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command default values; +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); +select count(*) from test_insert_command_data; + count +------- + 0 +(1 row) + +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + count +------- + 1 +(1 row) + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + count +------- + 1 +(1 row) + +drop table test_insert_command_data; +drop foreign table test_insert_command; +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; +-- drop source table to remove original text from toast +DROP TABLE test_long_text; +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + int_val +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/expected/query.out b/expected/query.out new file mode 100644 index 000000000..7ac3508a4 --- /dev/null +++ b/expected/query.out @@ -0,0 +1,105 @@ +-- +-- Test querying cstore_fdw tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +-- Query uncompressed data +SELECT count(*) FROM contestant; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant_compressed ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + to_json +------------------------------------------------------------------------------------------------------------------ + {"handle":"g","birthdate":"1991-12-13","rating":1803,"percentile":85.1,"country":"XD ","achievements":["a","c"]} +(1 row) + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + ?column? | b +----------+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 +(10 rows) + +DROP FOREIGN TABLE union_first, union_second; diff --git a/expected/truncate.out b/expected/truncate.out new file mode 100644 index 000000000..e16a6ea9f --- /dev/null +++ b/expected/truncate.out @@ -0,0 +1,262 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + t +(1 row) + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 26 +(1 row) + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 6 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for table truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + diff --git a/expected/truncate_0.out b/expected/truncate_0.out new file mode 100644 index 000000000..c8cc4ad98 --- /dev/null +++ b/expected/truncate_0.out @@ -0,0 +1,262 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + f +(1 row) + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 26 +(1 row) + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 6 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for relation truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + diff --git a/input/block_filtering.source b/input/block_filtering.source new file mode 100644 index 000000000..4451262d4 --- /dev/null +++ b/input/block_filtering.source @@ -0,0 +1,71 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- + + +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; + + +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/block_filtering.cstore', + block_row_count '1000', stripe_row_count '2000'); + +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; + + +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/collation_block_filtering.cstore'); +COPY collation_block_filtering_test FROM STDIN; +A +Å +B +\. + +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/input/copyto.source b/input/copyto.source new file mode 100644 index 000000000..96403a3f4 --- /dev/null +++ b/input/copyto.source @@ -0,0 +1,18 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/test_contestant.cstore'); + +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; + +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; + +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/input/create.source b/input/create.source new file mode 100644 index 000000000..fbd27dc50 --- /dev/null +++ b/input/create.source @@ -0,0 +1,49 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- + + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + + +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(filename 'data.cstore', bad_option_name '1'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(filename 'data.cstore', stripe_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(filename 'data.cstore', block_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(filename 'data.cstore', compression 'invalid_compression'); -- ERROR + +-- Invalid file path test +CREATE FOREIGN TABLE test_invalid_file_path () + SERVER cstore_server + OPTIONS(filename 'bad_directory_path/bad_file_path'); --ERROR + +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/contestant.cstore'); + + +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); + +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; diff --git a/input/data_types.source b/input/data_types.source new file mode 100644 index 000000000..c3398c67b --- /dev/null +++ b/input/data_types.source @@ -0,0 +1,74 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- + + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; + + +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/array_types.cstore'); + +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; + +SELECT * FROM test_array_types; + + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/datetime_types.cstore'); + +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; + +SELECT * FROM test_datetime_types; + + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); + +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/enum_and_composite_types.cstore'); + +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; + +SELECT * FROM test_enum_and_composite_types; + + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/range_types.cstore'); + +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; + +SELECT * FROM test_range_types; + + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/other_types.cstore'); + +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; + +SELECT * FROM test_other_types; + + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/null_values.cstore'); + +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; + +SELECT * FROM test_null_values; diff --git a/input/load.source b/input/load.source new file mode 100644 index 000000000..0913acde7 --- /dev/null +++ b/input/load.source @@ -0,0 +1,44 @@ +-- +-- Test loading data into cstore_fdw tables. +-- + +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR + +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR + +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; + +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; + +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +3.141,pi,1 +2.718,e,2 +0.577,gamma,3 +5.291e-11,bohr radius,4 +\. + +COPY famous_constants (name, value) FROM STDIN WITH CSV; +avagadro,6.022e23 +electron mass,9.109e-31 +proton mass,1.672e-27 +speed of light,2.997e8 +\. + +SELECT * FROM famous_constants ORDER BY id, name; + +DROP FOREIGN TABLE famous_constants; diff --git a/output/block_filtering.source b/output/block_filtering.source new file mode 100644 index 000000000..21e1eb772 --- /dev/null +++ b/output/block_filtering.source @@ -0,0 +1,118 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/block_filtering.cstore', + block_row_count '1000', stripe_row_count '2000'); +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); + filtered_row_count +-------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); + filtered_row_count +-------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); + filtered_row_count +-------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +-------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 3958 +(1 row) + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/collation_block_filtering.cstore'); +COPY collation_block_filtering_test FROM STDIN; +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; + a +--- + Å +(1 row) + diff --git a/output/copyto.source b/output/copyto.source new file mode 100644 index 000000000..6024dd205 --- /dev/null +++ b/output/copyto.source @@ -0,0 +1,24 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/test_contestant.cstore'); +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/output/create.source b/output/create.source new file mode 100644 index 000000000..937afa2a0 --- /dev/null +++ b/output/create.source @@ -0,0 +1,50 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(filename 'data.cstore', bad_option_name '1'); -- ERROR +ERROR: invalid option "bad_option_name" +HINT: Valid options in this context are: filename, compression, stripe_row_count, block_row_count +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(filename 'data.cstore', stripe_row_count '0'); -- ERROR +ERROR: invalid stripe row count +HINT: Stripe row count must be an integer between 1000 and 10000000 +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(filename 'data.cstore', block_row_count '0'); -- ERROR +ERROR: invalid block row count +HINT: Block row count must be an integer between 1000 and 100000 +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(filename 'data.cstore', compression 'invalid_compression'); -- ERROR +ERROR: invalid compression type +HINT: Valid options are: none, pglz +-- Invalid file path test +CREATE FOREIGN TABLE test_invalid_file_path () + SERVER cstore_server + OPTIONS(filename 'bad_directory_path/bad_file_path'); --ERROR +ERROR: could not open file "bad_directory_path/bad_file_path" for writing: No such file or directory +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/contestant.cstore'); +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/output/data_types.source b/output/data_types.source new file mode 100644 index 000000000..efa03a663 --- /dev/null +++ b/output/data_types.source @@ -0,0 +1,84 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/array_types.cstore'); +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; +SELECT * FROM test_array_types; + int_array | bigint_array | text_array +--------------------------+--------------------------------------------+------------ + {1,2,3} | {1,2,3} | {a,b,c} + {} | {} | {} + {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} +(3 rows) + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/datetime_types.cstore'); +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; +SELECT * FROM test_datetime_types; + timestamp | timestamp_with_timezone | date | time | interval +---------------------+-------------------------+------------+----------+----------- + 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours + 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 +(2 rows) + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/enum_and_composite_types.cstore'); +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; +SELECT * FROM test_enum_and_composite_types; + enum | composite +------+----------- + a | (2,b) + b | (3,c) +(2 rows) + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/range_types.cstore'); +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; +SELECT * FROM test_range_types; + int4range | int8range | numrange | tsrange +-----------+-----------+----------+----------------------------------------------- + [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") + empty | [1,) | (,) | empty +(2 rows) + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/other_types.cstore'); +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; +SELECT * FROM test_other_types; + bool | bytea | money | inet | bitstring | uuid | json +------+------------+-------+-------------+-----------+--------------------------------------+------------------ + f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} + t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] +(2 rows) + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server + OPTIONS(filename '@abs_srcdir@/data/null_values.cstore'); +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; +SELECT * FROM test_null_values; + a | b | c +---+--------+----- + | {NULL} | (,) + | | +(2 rows) + diff --git a/output/load.source b/output/load.source new file mode 100644 index 000000000..c76f203eb --- /dev/null +++ b/output/load.source @@ -0,0 +1,39 @@ +-- +-- Test loading data into cstore_fdw tables. +-- +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR +ERROR: missing data for column "birthdate" +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR +ERROR: program "invalid_program" failed +DETAIL: command not found +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +COPY famous_constants (name, value) FROM STDIN WITH CSV; +SELECT * FROM famous_constants ORDER BY id, name; + id | name | value +----+----------------+----------- + 1 | pi | 3.141 + 2 | e | 2.718 + 3 | gamma | 0.577 + 4 | bohr radius | 5.291e-11 + | avagadro | 6.022e+23 + | electron mass | 9.109e-31 + | proton mass | 1.672e-27 + | speed of light | 2.997e+08 +(8 rows) + +DROP FOREIGN TABLE famous_constants; diff --git a/sql/alter.sql b/sql/alter.sql new file mode 100644 index 000000000..5ba3beb34 --- /dev/null +++ b/sql/alter.sql @@ -0,0 +1,85 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; + +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; + +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; + +-- test analyze +ANALYZE test_alter_table; + +-- verify select queries run as expected +SELECT * FROM test_alter_table; +SELECT a FROM test_alter_table; +SELECT b FROM test_alter_table; + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +INSERT INTO test_alter_table (SELECT 5, 8); + + +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; +SELECT count(*) from test_alter_table; +SELECT count(t.*) from test_alter_table t; + + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; + +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; + +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; + +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; + +DROP FOREIGN TABLE test_alter_table; diff --git a/sql/analyze.sql b/sql/analyze.sql new file mode 100644 index 000000000..4476454a6 --- /dev/null +++ b/sql/analyze.sql @@ -0,0 +1,11 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- + +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; diff --git a/sql/drop.sql b/sql/drop.sql new file mode 100644 index 000000000..a0852a279 --- /dev/null +++ b/sql/drop.sql @@ -0,0 +1,76 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- + +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. + +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; + +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +DROP SCHEMA test_schema CASCADE; + +-- Check that the files have been deleted and the directory is empty after the +-- DROP table command. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + +SELECT current_database() datname \gset + +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +-- should see 2 files, data and footer file for single table +SELECT count(*) FROM pg_ls_dir('cstore_fdw/' || :databaseoid); + +-- should see 2 directories 1 for each database, excluding postgres database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + +DROP EXTENSION cstore_fdw CASCADE; + +-- should only see 1 directory here +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; + +-- should see 2 directories 1 for each database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; + +\c :datname + +DROP DATABASE db_to_drop; + +-- should only see 1 directory for the default database +SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; diff --git a/sql/functions.sql b/sql/functions.sql new file mode 100644 index 000000000..ed7e260b3 --- /dev/null +++ b/sql/functions.sql @@ -0,0 +1,20 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- + +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); + +COPY table_with_data FROM STDIN; +1 +2 +3 +\. + +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); +SELECT cstore_table_size('non_cstore_table'); + +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/sql/insert.sql b/sql/insert.sql new file mode 100644 index 000000000..7a6b075ce --- /dev/null +++ b/sql/insert.sql @@ -0,0 +1,56 @@ +-- +-- Testing insert on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; + +-- test single row inserts fail +select count(*) from test_insert_command; +insert into test_insert_command values(1); +select count(*) from test_insert_command; + +insert into test_insert_command default values; +select count(*) from test_insert_command; + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); + +select count(*) from test_insert_command_data; +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + +drop table test_insert_command_data; +drop foreign table test_insert_command; + +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; + +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; + +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; + +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; + +-- drop source table to remove original text from toast +DROP TABLE test_long_text; + +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/sql/query.sql b/sql/query.sql new file mode 100644 index 000000000..87743e7bd --- /dev/null +++ b/sql/query.sql @@ -0,0 +1,34 @@ +-- +-- Test querying cstore_fdw tables. +-- + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; + +-- Query uncompressed data +SELECT count(*) FROM contestant; +SELECT avg(rating), stddev_samp(rating) FROM contestant; +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant ORDER BY handle; + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant_compressed ORDER BY handle; + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; + +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; + +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + +DROP FOREIGN TABLE union_first, union_second; diff --git a/sql/truncate.sql b/sql/truncate.sql new file mode 100644 index 000000000..0aac2bd34 --- /dev/null +++ b/sql/truncate.sql @@ -0,0 +1,135 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- + +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; + +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; + +-- query rows +SELECT * FROM cstore_truncate_test; + +TRUNCATE TABLE cstore_truncate_test; + +SELECT * FROM cstore_truncate_test; + +SELECT COUNT(*) from cstore_truncate_test; + +SELECT count(*) FROM cstore_truncate_test_compressed; +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; + +SELECT * from cstore_truncate_test; + +SELECT * from cstore_truncate_test_second; + +SELECT * from cstore_truncate_test_regular; + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; + +SELECT * from cstore_truncate_test; +SELECT * from cstore_truncate_test_second; +SELECT * from cstore_truncate_test_regular; + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; + +SELECT cstore_truncate_test_regular_func(); +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); +DROP FUNCTION cstore_truncate_test_regular_func(); + +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; + +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); + +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; + +SELECT current_user \gset + +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; + +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +\c - :current_user + +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +DROP USER truncate_user; + +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; From ba506acd35dee6859c517fbd45669a44a782bfa8 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Mon, 31 Aug 2020 11:39:08 -0700 Subject: [PATCH 002/124] Refactor the FDW API to take code out of cstore_fdw.c. --- Makefile | 4 +- cstore.c | 170 ++++++++++++++++++++++++++++ cstore.h | 311 +++++++++++++++++++++++++++++++++++++++++++++++++++ cstore_fdw.c | 258 +++++++++--------------------------------- cstore_fdw.h | 286 +--------------------------------------------- mod.c | 30 +++++ 6 files changed, 568 insertions(+), 491 deletions(-) create mode 100644 cstore.c create mode 100644 cstore.h create mode 100644 mod.c diff --git a/Makefile b/Makefile index 72daebc55..bd3ae77ce 100644 --- a/Makefile +++ b/Makefile @@ -7,8 +7,8 @@ MODULE_big = cstore_fdw PG_CPPFLAGS = --std=c99 SHLIB_LINK = -lprotobuf-c -OBJS = cstore.pb-c.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_metadata_serialization.o cstore_compression.o +OBJS = cstore.pb-c.o cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ + cstore_metadata_serialization.o cstore_compression.o mod.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ diff --git a/cstore.c b/cstore.c new file mode 100644 index 000000000..ccb59675f --- /dev/null +++ b/cstore.c @@ -0,0 +1,170 @@ +/*------------------------------------------------------------------------- + * + * cstore.c + * + * This file contains... + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "miscadmin.h" + +#include "cstore.h" + +#include + +static void CreateDirectory(StringInfo directoryName); +static bool DirectoryExists(StringInfo directoryName); + +/* ParseCompressionType converts a string to a compression type. */ +CompressionType +ParseCompressionType(const char *compressionTypeString) +{ + CompressionType compressionType = COMPRESSION_TYPE_INVALID; + Assert(compressionTypeString != NULL); + + if (strncmp(compressionTypeString, COMPRESSION_STRING_NONE, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_NONE; + } + else if (strncmp(compressionTypeString, COMPRESSION_STRING_PG_LZ, NAMEDATALEN) == 0) + { + compressionType = COMPRESSION_PG_LZ; + } + + return compressionType; +} + +/* CreateDirectory creates a new directory with the given directory name. */ +static void +CreateDirectory(StringInfo directoryName) +{ + int makeOK = mkdir(directoryName->data, S_IRWXU); + if (makeOK != 0) + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not create directory \"%s\": %m", + directoryName->data))); + } +} + +/* DirectoryExists checks if a directory exists for the given directory name. */ +static bool +DirectoryExists(StringInfo directoryName) +{ + bool directoryExists = true; + struct stat directoryStat; + + int statOK = stat(directoryName->data, &directoryStat); + if (statOK == 0) + { + /* file already exists; check that it is a directory */ + if (!S_ISDIR(directoryStat.st_mode)) + { + ereport(ERROR, (errmsg("\"%s\" is not a directory", directoryName->data), + errhint("You need to remove or rename the file \"%s\".", + directoryName->data))); + } + } + else + { + if (errno == ENOENT) + { + directoryExists = false; + } + else + { + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not stat directory \"%s\": %m", + directoryName->data))); + } + } + + return directoryExists; +} + +/* + * RemoveCStoreDatabaseDirectory removes CStore directory previously + * created for this database. + * However it does not remove 'cstore_fdw' directory even if there + * are no other databases left. + */ +void +RemoveCStoreDatabaseDirectory(Oid databaseOid) +{ + StringInfo cstoreDirectoryPath = makeStringInfo(); + StringInfo cstoreDatabaseDirectoryPath = makeStringInfo(); + + appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); + + appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, + CSTORE_FDW_NAME, databaseOid); + + if (DirectoryExists(cstoreDatabaseDirectoryPath)) + { + rmtree(cstoreDatabaseDirectoryPath->data, true); + } +} + + +/* + * InitializeCStoreTableFile creates data and footer file for a cstore table. + * The function assumes data and footer files do not exist, therefore + * it should be called on empty or non-existing table. Notice that the caller + * is expected to acquire AccessExclusiveLock on the relation. + */ +void +InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions) +{ + TableWriteState *writeState = NULL; + TupleDesc tupleDescriptor = RelationGetDescr(relation); + + /* + * Initialize state to write to the cstore file. This creates an + * empty data file and a valid footer file for the table. + */ + writeState = CStoreBeginWrite(cstoreOptions->filename, + cstoreOptions->compressionType, cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupleDescriptor); + CStoreEndWrite(writeState); +} + + +/* + * CreateCStoreDatabaseDirectory creates the directory (and parent directories, + * if needed) used to store automatically managed cstore_fdw files. The path to + * the directory is $PGDATA/cstore_fdw/{databaseOid}. + */ +void +CreateCStoreDatabaseDirectory(Oid databaseOid) +{ + bool cstoreDirectoryExists = false; + bool databaseDirectoryExists = false; + StringInfo cstoreDatabaseDirectoryPath = NULL; + + StringInfo cstoreDirectoryPath = makeStringInfo(); + appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); + + cstoreDirectoryExists = DirectoryExists(cstoreDirectoryPath); + if (!cstoreDirectoryExists) + { + CreateDirectory(cstoreDirectoryPath); + } + + cstoreDatabaseDirectoryPath = makeStringInfo(); + appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, + CSTORE_FDW_NAME, databaseOid); + + databaseDirectoryExists = DirectoryExists(cstoreDatabaseDirectoryPath); + if (!databaseDirectoryExists) + { + CreateDirectory(cstoreDatabaseDirectoryPath); + } +} + diff --git a/cstore.h b/cstore.h new file mode 100644 index 000000000..f51a972e2 --- /dev/null +++ b/cstore.h @@ -0,0 +1,311 @@ +/*------------------------------------------------------------------------- + * + * cstore.h + * + * Type and function declarations for CStore + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_H +#define CSTORE_H + +#include "access/tupdesc.h" +#include "fmgr.h" +#include "catalog/pg_am.h" +#include "catalog/pg_foreign_server.h" +#include "catalog/pg_foreign_table.h" +#include "lib/stringinfo.h" +#include "utils/rel.h" + +/* Defines for valid option names */ +#define OPTION_NAME_FILENAME "filename" +#define OPTION_NAME_COMPRESSION_TYPE "compression" +#define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" +#define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" + +/* Default values for option parameters */ +#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE +#define DEFAULT_STRIPE_ROW_COUNT 150000 +#define DEFAULT_BLOCK_ROW_COUNT 10000 + +/* Limits for option parameters */ +#define STRIPE_ROW_COUNT_MINIMUM 1000 +#define STRIPE_ROW_COUNT_MAXIMUM 10000000 +#define BLOCK_ROW_COUNT_MINIMUM 1000 +#define BLOCK_ROW_COUNT_MAXIMUM 100000 + +/* String representations of compression types */ +#define COMPRESSION_STRING_NONE "none" +#define COMPRESSION_STRING_PG_LZ "pglz" + +/* CStore file signature */ +#define CSTORE_MAGIC_NUMBER "citus_cstore" +#define CSTORE_VERSION_MAJOR 1 +#define CSTORE_VERSION_MINOR 7 + +/* miscellaneous defines */ +#define CSTORE_FDW_NAME "cstore_fdw" +#define CSTORE_FOOTER_FILE_SUFFIX ".footer" +#define CSTORE_TEMP_FILE_SUFFIX ".tmp" +#define CSTORE_TUPLE_COST_MULTIPLIER 10 +#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 +#define CSTORE_POSTSCRIPT_SIZE_MAX 256 + +/* Enumaration for cstore file's compression method */ +typedef enum +{ + COMPRESSION_TYPE_INVALID = -1, + COMPRESSION_NONE = 0, + COMPRESSION_PG_LZ = 1, + + COMPRESSION_COUNT + +} CompressionType; + + +/* + * CStoreFdwOptions holds the option values to be used when reading or writing + * a cstore file. To resolve these values, we first check foreign table's options, + * and if not present, we then fall back to the default values specified above. + */ +typedef struct CStoreOptions +{ + char *filename; + CompressionType compressionType; + uint64 stripeRowCount; + uint32 blockRowCount; + +} CStoreOptions; + + +/* + * StripeMetadata represents information about a stripe. This information is + * stored in the cstore file's footer. + */ +typedef struct StripeMetadata +{ + uint64 fileOffset; + uint64 skipListLength; + uint64 dataLength; + uint64 footerLength; + +} StripeMetadata; + + +/* TableFooter represents the footer of a cstore file. */ +typedef struct TableFooter +{ + List *stripeMetadataList; + uint64 blockRowCount; + +} TableFooter; + + +/* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ +typedef struct ColumnBlockSkipNode +{ + /* statistics about values of a column block */ + bool hasMinMax; + Datum minimumValue; + Datum maximumValue; + uint64 rowCount; + + /* + * Offsets and sizes of value and exists streams in the column data. + * These enable us to skip reading suppressed row blocks, and start reading + * a block without reading previous blocks. + */ + uint64 valueBlockOffset; + uint64 valueLength; + uint64 existsBlockOffset; + uint64 existsLength; + + CompressionType valueCompressionType; + +} ColumnBlockSkipNode; + + +/* + * StripeSkipList can be used for skipping row blocks. It contains a column block + * skip node for each block of each column. blockSkipNodeArray[column][block] + * is the entry for the specified column block. + */ +typedef struct StripeSkipList +{ + ColumnBlockSkipNode **blockSkipNodeArray; + uint32 columnCount; + uint32 blockCount; + +} StripeSkipList; + + +/* + * ColumnBlockData represents a block of data in a column. valueArray stores + * the values of data, and existsArray stores whether a value is present. + * valueBuffer is used to store (uncompressed) serialized values + * referenced by Datum's in valueArray. It is only used for by-reference Datum's. + * There is a one-to-one correspondence between valueArray and existsArray. + */ +typedef struct ColumnBlockData +{ + bool *existsArray; + Datum *valueArray; + + /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ + StringInfo valueBuffer; + +} ColumnBlockData; + + +/* + * ColumnBlockBuffers represents a block of serialized data in a column. + * valueBuffer stores the serialized values of data, and existsBuffer stores + * serialized value of presence information. valueCompressionType contains + * compression type if valueBuffer is compressed. Finally rowCount has + * the number of rows in this block. + */ +typedef struct ColumnBlockBuffers +{ + StringInfo existsBuffer; + StringInfo valueBuffer; + CompressionType valueCompressionType; + +} ColumnBlockBuffers; + + +/* + * ColumnBuffers represents data buffers for a column in a row stripe. Each + * column is made of multiple column blocks. + */ +typedef struct ColumnBuffers +{ + ColumnBlockBuffers **blockBuffersArray; + +} ColumnBuffers; + + +/* StripeBuffers represents data for a row stripe in a cstore file. */ +typedef struct StripeBuffers +{ + uint32 columnCount; + uint32 rowCount; + ColumnBuffers **columnBuffersArray; + +} StripeBuffers; + + +/* + * StripeFooter represents a stripe's footer. In this footer, we keep three + * arrays of sizes. The number of elements in each of the arrays is equal + * to the number of columns. + */ +typedef struct StripeFooter +{ + uint32 columnCount; + uint64 *skipListSizeArray; + uint64 *existsSizeArray; + uint64 *valueSizeArray; + +} StripeFooter; + + +/* TableReadState represents state of a cstore file read operation. */ +typedef struct TableReadState +{ + FILE *tableFile; + TableFooter *tableFooter; + TupleDesc tupleDescriptor; + + /* + * List of Var pointers for columns in the query. We use this both for + * getting vector of projected columns, and also when we want to build + * base constraint to find selected row blocks. + */ + List *projectedColumnList; + + List *whereClauseList; + MemoryContext stripeReadContext; + StripeBuffers *stripeBuffers; + uint32 readStripeCount; + uint64 stripeReadRowCount; + ColumnBlockData **blockDataArray; + int32 deserializedBlockIndex; + +} TableReadState; + + +/* TableWriteState represents state of a cstore file write operation. */ +typedef struct TableWriteState +{ + FILE *tableFile; + TableFooter *tableFooter; + StringInfo tableFooterFilename; + CompressionType compressionType; + TupleDesc tupleDescriptor; + FmgrInfo **comparisonFunctionArray; + uint64 currentFileOffset; + Relation relation; + + MemoryContext stripeWriteContext; + StripeBuffers *stripeBuffers; + StripeSkipList *stripeSkipList; + uint32 stripeMaxRowCount; + ColumnBlockData **blockDataArray; + /* + * compressionBuffer buffer is used as temporary storage during + * data value compression operation. It is kept here to minimize + * memory allocations. It lives in stripeWriteContext and gets + * deallocated when memory context is reset. + */ + StringInfo compressionBuffer; + +} TableWriteState; + +/* Function declarations for extension loading and unloading */ +extern void _PG_init(void); +extern void _PG_fini(void); + +extern CompressionType ParseCompressionType(const char *compressionTypeString); +extern void InitializeCStoreTableFile(Oid relationId, Relation relation, + CStoreOptions *cstoreOptions); +extern void CreateCStoreDatabaseDirectory(Oid databaseOid); +extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); + +/* Function declarations for writing to a cstore file */ +extern TableWriteState * CStoreBeginWrite(const char *filename, + CompressionType compressionType, + uint64 stripeMaxRowCount, + uint32 blockRowCount, + TupleDesc tupleDescriptor); +extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndWrite(TableWriteState * state); + +/* Function declarations for reading from a cstore file */ +extern TableReadState * CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, + List *projectedColumnList, List *qualConditions); +extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); +extern bool CStoreReadFinished(TableReadState *state); +extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, + bool *columnNulls); +extern void CStoreEndRead(TableReadState *state); + +/* Function declarations for common functions */ +extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, + int16 procedureId); +extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, + uint32 blockRowCount); +extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, + uint32 columnCount); +extern uint64 CStoreTableRowCount(const char *filename); +extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, + CompressionType compressionType); +extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); + + +#endif /* CSTORE_H */ diff --git a/cstore_fdw.c b/cstore_fdw.c index b0a327768..c80d53f2c 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -95,24 +95,18 @@ static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void TruncateCStoreTables(List *cstoreRelationList); static void DeleteCStoreTableFiles(char *filename); -static void InitializeCStoreTableFile(Oid relationId, Relation relation); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); static bool DistributedTable(Oid relationId); static bool DistributedWorkerCopy(CopyStmt *copyStatement); -static void CreateCStoreDatabaseDirectory(Oid databaseOid); -static bool DirectoryExists(StringInfo directoryName); -static void CreateDirectory(StringInfo directoryName); -static void RemoveCStoreDatabaseDirectory(Oid databaseOid); static StringInfo OptionNamesString(Oid currentContextId); static HeapTuple GetSlotHeapTuple(TupleTableSlot *tts); -static CStoreFdwOptions * CStoreGetOptions(Oid foreignTableId); +static CStoreOptions * CStoreGetOptions(Oid foreignTableId); static char * CStoreGetOptionValue(Oid foreignTableId, const char *optionName); static void ValidateForeignTableOptions(char *filename, char *compressionTypeString, char *stripeRowCountString, char *blockRowCountString); static char * CStoreDefaultFilePath(Oid foreignTableId); -static CompressionType ParseCompressionType(const char *compressionTypeString); static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId); static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, @@ -160,9 +154,6 @@ static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); #endif -/* declarations for dynamic loading */ -PG_MODULE_MAGIC; - PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); PG_FUNCTION_INFO_V1(cstore_table_size); PG_FUNCTION_INFO_V1(cstore_fdw_handler); @@ -175,11 +166,11 @@ static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; /* - * _PG_init is called when the module is loaded. In this function we save the + * Called when the module is loaded. In this function we save the * previous utility hook, and then install our hook to pre-intercept calls to * the copy command. */ -void _PG_init(void) +void cstore_fdw_init() { PreviousProcessUtilityHook = ProcessUtility_hook; ProcessUtility_hook = CStoreProcessUtility; @@ -187,10 +178,10 @@ void _PG_init(void) /* - * _PG_fini is called when the module is unloaded. This function uninstalls the + * Called when the module is unloaded. This function uninstalls the * extension's hooks. */ -void _PG_fini(void) +void cstore_fdw_finish() { ProcessUtility_hook = PreviousProcessUtilityHook; } @@ -249,7 +240,7 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) */ CreateCStoreDatabaseDirectory(MyDatabaseId); - InitializeCStoreTableFile(relationId, relation); + InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); heap_close(relation, AccessExclusiveLock); } } @@ -525,7 +516,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) Datum *columnValues = NULL; bool *columnNulls = NULL; TableWriteState *writeState = NULL; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; MemoryContext tupleContext = NULL; /* Only superuser can copy from or to local file */ @@ -546,7 +537,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) columnValues = palloc0(columnCount * sizeof(Datum)); columnNulls = palloc0(columnCount * sizeof(bool)); - cstoreFdwOptions = CStoreGetOptions(relationId); + cstoreOptions = CStoreGetOptions(relationId); /* * We create a new memory context called tuple context, and read and write @@ -580,10 +571,10 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) #endif /* init state to write to the cstore file */ - writeState = CStoreBeginWrite(cstoreFdwOptions->filename, - cstoreFdwOptions->compressionType, - cstoreFdwOptions->stripeRowCount, - cstoreFdwOptions->blockRowCount, + writeState = CStoreBeginWrite(cstoreOptions->filename, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupleDescriptor); while (nextRowFound) @@ -765,7 +756,7 @@ DroppedCStoreFilenameList(DropStmt *dropStatement) Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); if (CStoreTable(relationId)) { - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(relationId); + CStoreOptions *cstoreOptions = CStoreGetOptions(relationId); char *defaultfilename = CStoreDefaultFilePath(relationId); /* @@ -773,13 +764,13 @@ DroppedCStoreFilenameList(DropStmt *dropStatement) * by sql drop trigger. Both paths are generated by code, use * of strcmp is safe here. */ - if (strcmp(defaultfilename, cstoreFdwOptions->filename) == 0) + if (strcmp(defaultfilename, cstoreOptions->filename) == 0) { continue; } droppedCStoreFileList = lappend(droppedCStoreFileList, - cstoreFdwOptions->filename); + cstoreOptions->filename); } } } @@ -857,13 +848,13 @@ TruncateCStoreTables(List *cstoreRelationList) { Relation relation = (Relation) lfirst(relationCell); Oid relationId = relation->rd_id; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; Assert(CStoreTable(relationId)); - cstoreFdwOptions = CStoreGetOptions(relationId); - DeleteCStoreTableFiles(cstoreFdwOptions->filename); - InitializeCStoreTableFile(relationId, relation); + cstoreOptions = CStoreGetOptions(relationId); + DeleteCStoreTableFiles(cstoreOptions->filename); + InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } } @@ -901,29 +892,6 @@ DeleteCStoreTableFiles(char *filename) } -/* - * InitializeCStoreTableFile creates data and footer file for a cstore table. - * The function assumes data and footer files do not exist, therefore - * it should be called on empty or non-existing table. Notice that the caller - * is expected to acquire AccessExclusiveLock on the relation. - */ -static void InitializeCStoreTableFile(Oid relationId, Relation relation) -{ - TableWriteState *writeState = NULL; - TupleDesc tupleDescriptor = RelationGetDescr(relation); - CStoreFdwOptions* cstoreFdwOptions = CStoreGetOptions(relationId); - - /* - * Initialize state to write to the cstore file. This creates an - * empty data file and a valid footer file for the table. - */ - writeState = CStoreBeginWrite(cstoreFdwOptions->filename, - cstoreFdwOptions->compressionType, cstoreFdwOptions->stripeRowCount, - cstoreFdwOptions->blockRowCount, tupleDescriptor); - CStoreEndWrite(writeState); -} - - /* * CStoreTable checks if the given table name belongs to a foreign columnar store @@ -1045,111 +1013,7 @@ DistributedWorkerCopy(CopyStmt *copyStatement) } -/* - * CreateCStoreDatabaseDirectory creates the directory (and parent directories, - * if needed) used to store automatically managed cstore_fdw files. The path to - * the directory is $PGDATA/cstore_fdw/{databaseOid}. - */ -static void -CreateCStoreDatabaseDirectory(Oid databaseOid) -{ - bool cstoreDirectoryExists = false; - bool databaseDirectoryExists = false; - StringInfo cstoreDatabaseDirectoryPath = NULL; - StringInfo cstoreDirectoryPath = makeStringInfo(); - appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); - - cstoreDirectoryExists = DirectoryExists(cstoreDirectoryPath); - if (!cstoreDirectoryExists) - { - CreateDirectory(cstoreDirectoryPath); - } - - cstoreDatabaseDirectoryPath = makeStringInfo(); - appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, - CSTORE_FDW_NAME, databaseOid); - - databaseDirectoryExists = DirectoryExists(cstoreDatabaseDirectoryPath); - if (!databaseDirectoryExists) - { - CreateDirectory(cstoreDatabaseDirectoryPath); - } -} - - -/* DirectoryExists checks if a directory exists for the given directory name. */ -static bool -DirectoryExists(StringInfo directoryName) -{ - bool directoryExists = true; - struct stat directoryStat; - - int statOK = stat(directoryName->data, &directoryStat); - if (statOK == 0) - { - /* file already exists; check that it is a directory */ - if (!S_ISDIR(directoryStat.st_mode)) - { - ereport(ERROR, (errmsg("\"%s\" is not a directory", directoryName->data), - errhint("You need to remove or rename the file \"%s\".", - directoryName->data))); - } - } - else - { - if (errno == ENOENT) - { - directoryExists = false; - } - else - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat directory \"%s\": %m", - directoryName->data))); - } - } - - return directoryExists; -} - - -/* CreateDirectory creates a new directory with the given directory name. */ -static void -CreateDirectory(StringInfo directoryName) -{ - int makeOK = mkdir(directoryName->data, S_IRWXU); - if (makeOK != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not create directory \"%s\": %m", - directoryName->data))); - } -} - - -/* - * RemoveCStoreDatabaseDirectory removes CStore directory previously - * created for this database. - * However it does not remove 'cstore_fdw' directory even if there - * are no other databases left. - */ -static void -RemoveCStoreDatabaseDirectory(Oid databaseOid) -{ - StringInfo cstoreDirectoryPath = makeStringInfo(); - StringInfo cstoreDatabaseDirectoryPath = makeStringInfo(); - - appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); - - appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, - CSTORE_FDW_NAME, databaseOid); - - if (DirectoryExists(cstoreDatabaseDirectoryPath)) - { - rmtree(cstoreDatabaseDirectoryPath->data, true); - } -} /* @@ -1162,7 +1026,7 @@ cstore_table_size(PG_FUNCTION_ARGS) Oid relationId = PG_GETARG_OID(0); int64 tableSize = 0; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; char *dataFilename = NULL; StringInfo footerFilename = NULL; int dataFileStatResult = 0; @@ -1176,8 +1040,8 @@ cstore_table_size(PG_FUNCTION_ARGS) ereport(ERROR, (errmsg("relation is not a cstore table"))); } - cstoreFdwOptions = CStoreGetOptions(relationId); - dataFilename = cstoreFdwOptions->filename; + cstoreOptions = CStoreGetOptions(relationId); + dataFilename = cstoreOptions->filename; dataFileStatResult = stat(dataFilename, &dataFileStatBuffer); if (dataFileStatResult != 0) @@ -1402,10 +1266,10 @@ GetSlotHeapTuple(TupleTableSlot *tts) * foreign table, and if not present, falls back to default values. This function * errors out if given option values are considered invalid. */ -static CStoreFdwOptions * +static CStoreOptions * CStoreGetOptions(Oid foreignTableId) { - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; char *filename = NULL; CompressionType compressionType = DEFAULT_COMPRESSION_TYPE; int32 stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; @@ -1445,13 +1309,13 @@ CStoreGetOptions(Oid foreignTableId) filename = CStoreDefaultFilePath(foreignTableId); } - cstoreFdwOptions = palloc0(sizeof(CStoreFdwOptions)); - cstoreFdwOptions->filename = filename; - cstoreFdwOptions->compressionType = compressionType; - cstoreFdwOptions->stripeRowCount = stripeRowCount; - cstoreFdwOptions->blockRowCount = blockRowCount; + cstoreOptions = palloc0(sizeof(CStoreOptions)); + cstoreOptions->filename = filename; + cstoreOptions->compressionType = compressionType; + cstoreOptions->stripeRowCount = stripeRowCount; + cstoreOptions->blockRowCount = blockRowCount; - return cstoreFdwOptions; + return cstoreOptions; } @@ -1577,26 +1441,6 @@ CStoreDefaultFilePath(Oid foreignTableId) } -/* ParseCompressionType converts a string to a compression type. */ -static CompressionType -ParseCompressionType(const char *compressionTypeString) -{ - CompressionType compressionType = COMPRESSION_TYPE_INVALID; - Assert(compressionTypeString != NULL); - - if (strncmp(compressionTypeString, COMPRESSION_STRING_NONE, NAMEDATALEN) == 0) - { - compressionType = COMPRESSION_NONE; - } - else if (strncmp(compressionTypeString, COMPRESSION_STRING_PG_LZ, NAMEDATALEN) == 0) - { - compressionType = COMPRESSION_PG_LZ; - } - - return compressionType; -} - - /* * CStoreGetForeignRelSize obtains relation size estimates for a foreign table and * puts its estimate for row count into baserel->rows. @@ -1604,8 +1448,8 @@ ParseCompressionType(const char *compressionTypeString) static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); - double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); + double tupleCountEstimate = TupleCountEstimate(baserel, cstoreOptions->filename); double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, 0, JOIN_INNER, NULL); @@ -1624,7 +1468,7 @@ static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { Path *foreignScanPath = NULL; - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); Relation relation = heap_open(foreignTableId, AccessShareLock); /* @@ -1645,14 +1489,14 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId */ List *queryColumnList = ColumnList(baserel, foreignTableId); uint32 queryColumnCount = list_length(queryColumnList); - BlockNumber relationPageCount = PageCount(cstoreFdwOptions->filename); + BlockNumber relationPageCount = PageCount(cstoreOptions->filename); uint32 relationColumnCount = RelationGetNumberOfAttributes(relation); double queryColumnRatio = (double) queryColumnCount / relationColumnCount; double queryPageCount = relationPageCount * queryColumnRatio; double totalDiskAccessCost = seq_page_cost * queryPageCount; - double tupleCountEstimate = TupleCountEstimate(baserel, cstoreFdwOptions->filename); + double tupleCountEstimate = TupleCountEstimate(baserel, cstoreOptions->filename); /* * We estimate costs almost the same way as cost_seqscan(), thus assuming @@ -1922,16 +1766,16 @@ static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) { Oid foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - ExplainPropertyText("CStore File", cstoreFdwOptions->filename, explainState); + ExplainPropertyText("CStore File", cstoreOptions->filename, explainState); /* supress file size if we're not showing cost details */ if (explainState->costs) { struct stat statBuffer; - int statResult = stat(cstoreFdwOptions->filename, &statBuffer); + int statResult = stat(cstoreOptions->filename, &statBuffer); if (statResult == 0) { ExplainPropertyLong("CStore File Size", (long) statBuffer.st_size, @@ -1947,7 +1791,7 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) { TableReadState *readState = NULL; Oid foreignTableId = InvalidOid; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; Relation currentRelation = scanState->ss.ss_currentRelation; TupleDesc tupleDescriptor = RelationGetDescr(currentRelation); List *columnList = NIL; @@ -1962,14 +1806,14 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) } foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - cstoreFdwOptions = CStoreGetOptions(foreignTableId); + cstoreOptions = CStoreGetOptions(foreignTableId); foreignScan = (ForeignScan *) scanState->ss.ps.plan; foreignPrivateList = (List *) foreignScan->fdw_private; whereClauseList = foreignScan->scan.plan.qual; columnList = (List *) linitial(foreignPrivateList); - readState = CStoreBeginRead(cstoreFdwOptions->filename, tupleDescriptor, + readState = CStoreBeginRead(cstoreOptions->filename, tupleDescriptor, columnList, whereClauseList); scanState->fdw_state = (void *) readState; @@ -2040,18 +1884,18 @@ CStoreAnalyzeForeignTable(Relation relation, BlockNumber *totalPageCount) { Oid foreignTableId = RelationGetRelid(relation); - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(foreignTableId); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); struct stat statBuffer; - int statResult = stat(cstoreFdwOptions->filename, &statBuffer); + int statResult = stat(cstoreOptions->filename, &statBuffer); if (statResult < 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", - cstoreFdwOptions->filename))); + cstoreOptions->filename))); } - (*totalPageCount) = PageCount(cstoreFdwOptions->filename); + (*totalPageCount) = PageCount(cstoreOptions->filename); (*acquireSampleRowsFunc) = CStoreAcquireSampleRows; return true; @@ -2311,20 +2155,20 @@ static void CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo) { Oid foreignTableOid = InvalidOid; - CStoreFdwOptions *cstoreFdwOptions = NULL; + CStoreOptions *cstoreOptions = NULL; TupleDesc tupleDescriptor = NULL; TableWriteState *writeState = NULL; Relation relation = NULL; foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); relation = heap_open(foreignTableOid, ShareUpdateExclusiveLock); - cstoreFdwOptions = CStoreGetOptions(foreignTableOid); + cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - writeState = CStoreBeginWrite(cstoreFdwOptions->filename, - cstoreFdwOptions->compressionType, - cstoreFdwOptions->stripeRowCount, - cstoreFdwOptions->blockRowCount, + writeState = CStoreBeginWrite(cstoreOptions->filename, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupleDescriptor); writeState->relation = relation; diff --git a/cstore_fdw.h b/cstore_fdw.h index 2bc3e9c97..7b8475497 100644 --- a/cstore_fdw.h +++ b/cstore_fdw.h @@ -22,41 +22,7 @@ #include "lib/stringinfo.h" #include "utils/rel.h" - -/* Defines for valid option names */ -#define OPTION_NAME_FILENAME "filename" -#define OPTION_NAME_COMPRESSION_TYPE "compression" -#define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" -#define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" - -/* Default values for option parameters */ -#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE -#define DEFAULT_STRIPE_ROW_COUNT 150000 -#define DEFAULT_BLOCK_ROW_COUNT 10000 - -/* Limits for option parameters */ -#define STRIPE_ROW_COUNT_MINIMUM 1000 -#define STRIPE_ROW_COUNT_MAXIMUM 10000000 -#define BLOCK_ROW_COUNT_MINIMUM 1000 -#define BLOCK_ROW_COUNT_MAXIMUM 100000 - -/* String representations of compression types */ -#define COMPRESSION_STRING_NONE "none" -#define COMPRESSION_STRING_PG_LZ "pglz" -#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" - -/* CStore file signature */ -#define CSTORE_MAGIC_NUMBER "citus_cstore" -#define CSTORE_VERSION_MAJOR 1 -#define CSTORE_VERSION_MINOR 7 - -/* miscellaneous defines */ -#define CSTORE_FDW_NAME "cstore_fdw" -#define CSTORE_FOOTER_FILE_SUFFIX ".footer" -#define CSTORE_TEMP_FILE_SUFFIX ".tmp" -#define CSTORE_TUPLE_COST_MULTIPLIER 10 -#define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 -#define CSTORE_POSTSCRIPT_SIZE_MAX 256 +#include "cstore.h" /* table containing information about how to partition distributed tables */ #define CITUS_EXTENSION_NAME "citus" @@ -67,7 +33,6 @@ #define ATTR_NUM_PARTITION_TYPE 2 #define ATTR_NUM_PARTITION_KEY 3 - /* * CStoreValidOption keeps an option name and a context. When an option is passed * into cstore_fdw objects (server and foreign table), we compare this option's @@ -80,6 +45,7 @@ typedef struct CStoreValidOption } CStoreValidOption; +#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" /* Array of options that are valid for cstore_fdw */ static const uint32 ValidOptionCount = 4; @@ -92,220 +58,8 @@ static const CStoreValidOption ValidOptionArray[] = { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } }; - -/* Enumaration for cstore file's compression method */ -typedef enum -{ - COMPRESSION_TYPE_INVALID = -1, - COMPRESSION_NONE = 0, - COMPRESSION_PG_LZ = 1, - - COMPRESSION_COUNT - -} CompressionType; - - -/* - * CStoreFdwOptions holds the option values to be used when reading or writing - * a cstore file. To resolve these values, we first check foreign table's options, - * and if not present, we then fall back to the default values specified above. - */ -typedef struct CStoreFdwOptions -{ - char *filename; - CompressionType compressionType; - uint64 stripeRowCount; - uint32 blockRowCount; - -} CStoreFdwOptions; - - -/* - * StripeMetadata represents information about a stripe. This information is - * stored in the cstore file's footer. - */ -typedef struct StripeMetadata -{ - uint64 fileOffset; - uint64 skipListLength; - uint64 dataLength; - uint64 footerLength; - -} StripeMetadata; - - -/* TableFooter represents the footer of a cstore file. */ -typedef struct TableFooter -{ - List *stripeMetadataList; - uint64 blockRowCount; - -} TableFooter; - - -/* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ -typedef struct ColumnBlockSkipNode -{ - /* statistics about values of a column block */ - bool hasMinMax; - Datum minimumValue; - Datum maximumValue; - uint64 rowCount; - - /* - * Offsets and sizes of value and exists streams in the column data. - * These enable us to skip reading suppressed row blocks, and start reading - * a block without reading previous blocks. - */ - uint64 valueBlockOffset; - uint64 valueLength; - uint64 existsBlockOffset; - uint64 existsLength; - - CompressionType valueCompressionType; - -} ColumnBlockSkipNode; - - -/* - * StripeSkipList can be used for skipping row blocks. It contains a column block - * skip node for each block of each column. blockSkipNodeArray[column][block] - * is the entry for the specified column block. - */ -typedef struct StripeSkipList -{ - ColumnBlockSkipNode **blockSkipNodeArray; - uint32 columnCount; - uint32 blockCount; - -} StripeSkipList; - - -/* - * ColumnBlockData represents a block of data in a column. valueArray stores - * the values of data, and existsArray stores whether a value is present. - * valueBuffer is used to store (uncompressed) serialized values - * referenced by Datum's in valueArray. It is only used for by-reference Datum's. - * There is a one-to-one correspondence between valueArray and existsArray. - */ -typedef struct ColumnBlockData -{ - bool *existsArray; - Datum *valueArray; - - /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ - StringInfo valueBuffer; - -} ColumnBlockData; - - -/* - * ColumnBlockBuffers represents a block of serialized data in a column. - * valueBuffer stores the serialized values of data, and existsBuffer stores - * serialized value of presence information. valueCompressionType contains - * compression type if valueBuffer is compressed. Finally rowCount has - * the number of rows in this block. - */ -typedef struct ColumnBlockBuffers -{ - StringInfo existsBuffer; - StringInfo valueBuffer; - CompressionType valueCompressionType; - -} ColumnBlockBuffers; - - -/* - * ColumnBuffers represents data buffers for a column in a row stripe. Each - * column is made of multiple column blocks. - */ -typedef struct ColumnBuffers -{ - ColumnBlockBuffers **blockBuffersArray; - -} ColumnBuffers; - - -/* StripeBuffers represents data for a row stripe in a cstore file. */ -typedef struct StripeBuffers -{ - uint32 columnCount; - uint32 rowCount; - ColumnBuffers **columnBuffersArray; - -} StripeBuffers; - - -/* - * StripeFooter represents a stripe's footer. In this footer, we keep three - * arrays of sizes. The number of elements in each of the arrays is equal - * to the number of columns. - */ -typedef struct StripeFooter -{ - uint32 columnCount; - uint64 *skipListSizeArray; - uint64 *existsSizeArray; - uint64 *valueSizeArray; - -} StripeFooter; - - -/* TableReadState represents state of a cstore file read operation. */ -typedef struct TableReadState -{ - FILE *tableFile; - TableFooter *tableFooter; - TupleDesc tupleDescriptor; - - /* - * List of Var pointers for columns in the query. We use this both for - * getting vector of projected columns, and also when we want to build - * base constraint to find selected row blocks. - */ - List *projectedColumnList; - - List *whereClauseList; - MemoryContext stripeReadContext; - StripeBuffers *stripeBuffers; - uint32 readStripeCount; - uint64 stripeReadRowCount; - ColumnBlockData **blockDataArray; - int32 deserializedBlockIndex; - -} TableReadState; - - -/* TableWriteState represents state of a cstore file write operation. */ -typedef struct TableWriteState -{ - FILE *tableFile; - TableFooter *tableFooter; - StringInfo tableFooterFilename; - CompressionType compressionType; - TupleDesc tupleDescriptor; - FmgrInfo **comparisonFunctionArray; - uint64 currentFileOffset; - Relation relation; - - MemoryContext stripeWriteContext; - StripeBuffers *stripeBuffers; - StripeSkipList *stripeSkipList; - uint32 stripeMaxRowCount; - ColumnBlockData **blockDataArray; - /* - * compressionBuffer buffer is used as temporary storage during - * data value compression operation. It is kept here to minimize - * memory allocations. It lives in stripeWriteContext and gets - * deallocated when memory context is reset. - */ - StringInfo compressionBuffer; - -} TableWriteState; - -/* Function declarations for extension loading and unloading */ -extern void _PG_init(void); -extern void _PG_fini(void); +void cstore_fdw_init(void); +void cstore_fdw_finish(void); /* event trigger function declarations */ extern Datum cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS); @@ -318,36 +72,4 @@ extern Datum cstore_clean_table_resources(PG_FUNCTION_ARGS); extern Datum cstore_fdw_handler(PG_FUNCTION_ARGS); extern Datum cstore_fdw_validator(PG_FUNCTION_ARGS); -/* Function declarations for writing to a cstore file */ -extern TableWriteState * CStoreBeginWrite(const char *filename, - CompressionType compressionType, - uint64 stripeMaxRowCount, - uint32 blockRowCount, - TupleDesc tupleDescriptor); -extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, - bool *columnNulls); -extern void CStoreEndWrite(TableWriteState * state); - -/* Function declarations for reading from a cstore file */ -extern TableReadState * CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, - List *projectedColumnList, List *qualConditions); -extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); -extern bool CStoreReadFinished(TableReadState *state); -extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, - bool *columnNulls); -extern void CStoreEndRead(TableReadState *state); - -/* Function declarations for common functions */ -extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, - int16 procedureId); -extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, - uint32 blockRowCount); -extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, - uint32 columnCount); -extern uint64 CStoreTableRowCount(const char *filename); -extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, - CompressionType compressionType); -extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); - - #endif /* CSTORE_FDW_H */ diff --git a/mod.c b/mod.c new file mode 100644 index 000000000..aa65ac0ec --- /dev/null +++ b/mod.c @@ -0,0 +1,30 @@ +/*------------------------------------------------------------------------- + * + * mod.c + * + * This file contains module-level definitions. + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "cstore_fdw.h" + +PG_MODULE_MAGIC; + +void _PG_init(void) +{ + cstore_fdw_init(); +} + + +void _PG_fini(void) +{ + cstore_fdw_finish(); +} + From 59d5d961702df30b6f30314499557e4b5ce5f4fb Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 2 Sep 2020 10:31:10 -0700 Subject: [PATCH 003/124] move _PG_* declarations to mod.h --- cstore.h | 4 ---- mod.c | 1 + mod.h | 21 +++++++++++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 mod.h diff --git a/cstore.h b/cstore.h index f51a972e2..d45fde914 100644 --- a/cstore.h +++ b/cstore.h @@ -266,10 +266,6 @@ typedef struct TableWriteState } TableWriteState; -/* Function declarations for extension loading and unloading */ -extern void _PG_init(void); -extern void _PG_fini(void); - extern CompressionType ParseCompressionType(const char *compressionTypeString); extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); diff --git a/mod.c b/mod.c index aa65ac0ec..8cb138c62 100644 --- a/mod.c +++ b/mod.c @@ -13,6 +13,7 @@ #include "postgres.h" +#include "mod.h" #include "cstore_fdw.h" PG_MODULE_MAGIC; diff --git a/mod.h b/mod.h new file mode 100644 index 000000000..3196bc809 --- /dev/null +++ b/mod.h @@ -0,0 +1,21 @@ +/*------------------------------------------------------------------------- + * + * mod.h + * + * Type and function declarations for CStore + * + * Copyright (c) 2016, Citus Data, Inc. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef MOD_H +#define MOD_H + +/* Function declarations for extension loading and unloading */ +extern void _PG_init(void); +extern void _PG_fini(void); + +#endif /* MOD_H */ From 3089c92103607acb62cbb06f1944c5509c18d1eb Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 2 Sep 2020 11:41:01 -0700 Subject: [PATCH 004/124] header file and include cleanup --- cstore.c | 38 +++++++++++++- cstore.h | 7 +-- cstore_compression.c | 2 +- cstore_fdw.c | 92 ++++++++++++++++----------------- cstore_fdw.h | 44 +--------------- cstore_metadata_serialization.c | 7 +-- cstore_metadata_serialization.h | 6 --- cstore_reader.c | 12 ++--- cstore_version_compat.h | 2 +- cstore_writer.c | 18 ++----- mod.c | 2 + 11 files changed, 101 insertions(+), 129 deletions(-) diff --git a/cstore.c b/cstore.c index ccb59675f..e704bc31d 100644 --- a/cstore.c +++ b/cstore.c @@ -13,12 +13,14 @@ #include "postgres.h" +#include +#include + #include "miscadmin.h" +#include "utils/rel.h" #include "cstore.h" -#include - static void CreateDirectory(StringInfo directoryName); static bool DirectoryExists(StringInfo directoryName); @@ -168,3 +170,35 @@ CreateCStoreDatabaseDirectory(Oid databaseOid) } } + +/* + * DeleteCStoreTableFiles deletes the data and footer files for a cstore table + * whose data filename is given. + */ +void +DeleteCStoreTableFiles(char *filename) +{ + int dataFileRemoved = 0; + int footerFileRemoved = 0; + + StringInfo tableFooterFilename = makeStringInfo(); + appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); + + /* delete the footer file */ + footerFileRemoved = unlink(tableFooterFilename->data); + if (footerFileRemoved != 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not delete file \"%s\": %m", + tableFooterFilename->data))); + } + + /* delete the data file */ + dataFileRemoved = unlink(filename); + if (dataFileRemoved != 0) + { + ereport(WARNING, (errcode_for_file_access(), + errmsg("could not delete file \"%s\": %m", + filename))); + } +} diff --git a/cstore.h b/cstore.h index d45fde914..a694e1e29 100644 --- a/cstore.h +++ b/cstore.h @@ -14,13 +14,9 @@ #ifndef CSTORE_H #define CSTORE_H -#include "access/tupdesc.h" #include "fmgr.h" -#include "catalog/pg_am.h" -#include "catalog/pg_foreign_server.h" -#include "catalog/pg_foreign_table.h" #include "lib/stringinfo.h" -#include "utils/rel.h" +#include "utils/relcache.h" /* Defines for valid option names */ #define OPTION_NAME_FILENAME "filename" @@ -271,6 +267,7 @@ extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); extern void CreateCStoreDatabaseDirectory(Oid databaseOid); extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); +extern void DeleteCStoreTableFiles(char *filename); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(const char *filename, diff --git a/cstore_compression.c b/cstore_compression.c index 3b37fd47a..a3c5f9f7d 100644 --- a/cstore_compression.c +++ b/cstore_compression.c @@ -12,7 +12,6 @@ *------------------------------------------------------------------------- */ #include "postgres.h" -#include "cstore_fdw.h" #if PG_VERSION_NUM >= 90500 #include "common/pg_lzcompress.h" @@ -20,6 +19,7 @@ #include "utils/pg_lzcompress.h" #endif +#include "cstore.h" diff --git a/cstore_fdw.c b/cstore_fdw.c index c80d53f2c..9787fd2a2 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -15,15 +15,11 @@ */ #include "postgres.h" -#include "cstore_fdw.h" -#include "cstore_version_compat.h" #include -#include -#include -#include "access/htup_details.h" + +#include "access/heapam.h" #include "access/reloptions.h" -#include "access/sysattr.h" #include "access/tuptoaster.h" #include "catalog/namespace.h" #include "catalog/pg_foreign_table.h" @@ -39,35 +35,71 @@ #include "foreign/foreign.h" #include "miscadmin.h" #include "nodes/makefuncs.h" +#if PG_VERSION_NUM < 120000 #include "optimizer/cost.h" +#endif #include "optimizer/pathnode.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" #if PG_VERSION_NUM >= 120000 #include "access/heapam.h" -#include "access/tableam.h" -#include "executor/tuptable.h" #include "optimizer/optimizer.h" #else #include "optimizer/var.h" #endif #include "parser/parser.h" -#include "parser/parsetree.h" #include "parser/parse_coerce.h" #include "parser/parse_type.h" -#include "storage/fd.h" #include "tcop/utility.h" #include "utils/builtins.h" #include "utils/fmgroids.h" -#include "utils/memutils.h" #include "utils/lsyscache.h" -#include "utils/rel.h" #if PG_VERSION_NUM >= 120000 #include "utils/snapmgr.h" #else #include "utils/tqual.h" #endif +#if PG_VERSION_NUM < 120000 +#include "utils/rel.h" +#endif +#include "cstore.h" +#include "cstore_fdw.h" +#include "cstore_version_compat.h" + +/* table containing information about how to partition distributed tables */ +#define CITUS_EXTENSION_NAME "citus" +#define CITUS_PARTITION_TABLE_NAME "pg_dist_partition" + +/* human-readable names for addressing columns of the pg_dist_partition table */ +#define ATTR_NUM_PARTITION_RELATION_ID 1 +#define ATTR_NUM_PARTITION_TYPE 2 +#define ATTR_NUM_PARTITION_KEY 3 + +/* + * CStoreValidOption keeps an option name and a context. When an option is passed + * into cstore_fdw objects (server and foreign table), we compare this option's + * name and context against those of valid options. + */ +typedef struct CStoreValidOption +{ + const char *optionName; + Oid optionContextId; + +} CStoreValidOption; + +#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" + +/* Array of options that are valid for cstore_fdw */ +static const uint32 ValidOptionCount = 4; +static const CStoreValidOption ValidOptionArray[] = +{ + /* foreign table options */ + { OPTION_NAME_FILENAME, ForeignTableRelationId }, + { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, + { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, + { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } +}; /* local functions forward declarations */ #if PG_VERSION_NUM >= 100000 @@ -94,7 +126,6 @@ static List * DroppedCStoreFilenameList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void TruncateCStoreTables(List *cstoreRelationList); -static void DeleteCStoreTableFiles(char *filename); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); static bool DistributedTable(Oid relationId); @@ -858,41 +889,6 @@ TruncateCStoreTables(List *cstoreRelationList) } } - -/* - * DeleteCStoreTableFiles deletes the data and footer files for a cstore table - * whose data filename is given. - */ -static void -DeleteCStoreTableFiles(char *filename) -{ - int dataFileRemoved = 0; - int footerFileRemoved = 0; - - StringInfo tableFooterFilename = makeStringInfo(); - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - /* delete the footer file */ - footerFileRemoved = unlink(tableFooterFilename->data); - if (footerFileRemoved != 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not delete file \"%s\": %m", - tableFooterFilename->data))); - } - - /* delete the data file */ - dataFileRemoved = unlink(filename); - if (dataFileRemoved != 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not delete file \"%s\": %m", - filename))); - } -} - - - /* * CStoreTable checks if the given table name belongs to a foreign columnar store * table. If it does, the function returns true. Otherwise, it returns false. diff --git a/cstore_fdw.h b/cstore_fdw.h index 7b8475497..c7b4460ed 100644 --- a/cstore_fdw.h +++ b/cstore_fdw.h @@ -14,49 +14,9 @@ #ifndef CSTORE_FDW_H #define CSTORE_FDW_H -#include "access/tupdesc.h" +#include "postgres.h" + #include "fmgr.h" -#include "catalog/pg_am.h" -#include "catalog/pg_foreign_server.h" -#include "catalog/pg_foreign_table.h" -#include "lib/stringinfo.h" -#include "utils/rel.h" - -#include "cstore.h" - -/* table containing information about how to partition distributed tables */ -#define CITUS_EXTENSION_NAME "citus" -#define CITUS_PARTITION_TABLE_NAME "pg_dist_partition" - -/* human-readable names for addressing columns of the pg_dist_partition table */ -#define ATTR_NUM_PARTITION_RELATION_ID 1 -#define ATTR_NUM_PARTITION_TYPE 2 -#define ATTR_NUM_PARTITION_KEY 3 - -/* - * CStoreValidOption keeps an option name and a context. When an option is passed - * into cstore_fdw objects (server and foreign table), we compare this option's - * name and context against those of valid options. - */ -typedef struct CStoreValidOption -{ - const char *optionName; - Oid optionContextId; - -} CStoreValidOption; - -#define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" - -/* Array of options that are valid for cstore_fdw */ -static const uint32 ValidOptionCount = 4; -static const CStoreValidOption ValidOptionArray[] = -{ - /* foreign table options */ - { OPTION_NAME_FILENAME, ForeignTableRelationId }, - { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, - { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, - { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } -}; void cstore_fdw_init(void); void cstore_fdw_finish(void); diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c index 26402f897..67ae2ec2c 100644 --- a/cstore_metadata_serialization.c +++ b/cstore_metadata_serialization.c @@ -14,11 +14,12 @@ #include "postgres.h" -#include "cstore_fdw.h" -#include "cstore_metadata_serialization.h" -#include "cstore.pb-c.h" + #include "access/tupmacs.h" +#include "cstore.h" +#include "cstore_metadata_serialization.h" +#include "cstore.pb-c.h" /* local functions forward declarations */ static ProtobufCBinaryData DatumToProtobufBinary(Datum datum, bool typeByValue, diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h index 421f8ddff..b8890a5d4 100644 --- a/cstore_metadata_serialization.h +++ b/cstore_metadata_serialization.h @@ -14,12 +14,6 @@ #ifndef CSTORE_SERIALIZATION_H #define CSTORE_SERIALIZATION_H -#include "catalog/pg_attribute.h" -#include "nodes/pg_list.h" -#include "lib/stringinfo.h" -#include "cstore_fdw.h" - - /* Function declarations for metadata serialization */ extern StringInfo SerializePostScript(uint64 tableFooterLength); extern StringInfo SerializeTableFooter(TableFooter *tableFooter); diff --git a/cstore_reader.c b/cstore_reader.c index 7e9c6bcfd..68ce5cdad 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -15,30 +15,26 @@ #include "postgres.h" -#include "cstore_fdw.h" -#include "cstore_metadata_serialization.h" -#include "cstore_version_compat.h" #include "access/nbtree.h" -#include "access/skey.h" +#include "catalog/pg_am.h" #include "commands/defrem.h" #include "nodes/makefuncs.h" #if PG_VERSION_NUM >= 120000 -#include "nodes/pathnodes.h" #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #else #include "optimizer/clauses.h" #include "optimizer/predtest.h" -#include "optimizer/var.h" #endif #include "optimizer/restrictinfo.h" -#include "port.h" #include "storage/fd.h" #include "utils/memutils.h" #include "utils/lsyscache.h" -#include "utils/rel.h" +#include "cstore.h" +#include "cstore_metadata_serialization.h" +#include "cstore_version_compat.h" /* static function declarations */ static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, diff --git a/cstore_version_compat.h b/cstore_version_compat.h index a7f961fcd..1b80b16c3 100644 --- a/cstore_version_compat.h +++ b/cstore_version_compat.h @@ -49,7 +49,7 @@ #define TTS_EMPTY(slot) ((slot)->tts_isempty) #define ExecForceStoreHeapTuple(tuple, slot, shouldFree) \ ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); -#define HeapScanDesc TableScanDesc +#define TableScanDesc HeapScanDesc #define table_beginscan heap_beginscan #define table_endscan heap_endscan diff --git a/cstore_writer.c b/cstore_writer.c index b69064215..51a01c8f3 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -15,25 +15,17 @@ #include "postgres.h" -#include "cstore_fdw.h" -#include "cstore_metadata_serialization.h" -#include "cstore_version_compat.h" #include + #include "access/nbtree.h" -#include "catalog/pg_collation.h" -#include "commands/defrem.h" -#if PG_VERSION_NUM >= 120000 -#include "optimizer/optimizer.h" -#else -#include "optimizer/var.h" -#endif -#include "port.h" +#include "catalog/pg_am.h" #include "storage/fd.h" #include "utils/memutils.h" -#include "utils/lsyscache.h" -#include "utils/rel.h" +#include "cstore.h" +#include "cstore_metadata_serialization.h" +#include "cstore_version_compat.h" static void CStoreWriteFooter(StringInfo footerFileName, TableFooter *tableFooter); static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, diff --git a/mod.c b/mod.c index 8cb138c62..dbc8eb923 100644 --- a/mod.c +++ b/mod.c @@ -13,6 +13,8 @@ #include "postgres.h" +#include "fmgr.h" + #include "mod.h" #include "cstore_fdw.h" From 406bebe4b8bfef7b25863d8c5516fe0deacc27e1 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Wed, 2 Sep 2020 14:27:24 -0700 Subject: [PATCH 005/124] update .gitignore --- .gitignore | 4 ++++ cstore_fdw--1.7.sql | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/.gitignore b/.gitignore index f95fd0b87..ad9061861 100644 --- a/.gitignore +++ b/.gitignore @@ -53,5 +53,9 @@ /expected/create.out /expected/data_types.out /expected/load.out +/results/* +/.deps/* +/regression.diffs +/regression.out *.pb-c.* diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index ad2683f52..2f001485f 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -58,3 +58,31 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); +CREATE TABLE cstore_table_metadata( + relid oid, + version_major int, + version_minor int +); + +CREATE TABLE cstore_stripe( + relid oid, + stripe bigint +); + +CREATE TABLE cstore_column_block_skip_node( + relid oid, + stripe bigint, + attr int, + blockid int, + rowcount bigint, + min_value text, + max_value text, + value_offset bigint, + value_length bigint, + value_compression_type char, + exists_offset bigint, + exists_length bigint); + +CREATE INDEX cstore_column_block_skip_node_idx + ON cstore_column_block_skip_node + USING BTREE(relid, stripe, attr, blockid); From f691576f13e54e008320eab3252c12f21b34c13c Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 3 Sep 2020 09:57:59 -0700 Subject: [PATCH 006/124] Move StripeFooter to metadata tables. --- .gitignore | 1 + Makefile | 5 +- cstore.c | 2 +- cstore.h | 17 ++- cstore.proto | 7 +- cstore_fdw--1.7.sql | 34 ++---- cstore_fdw.c | 23 ++-- cstore_metadata_serialization.c | 88 +-------------- cstore_metadata_serialization.h | 2 - cstore_metadata_tables.c | 186 ++++++++++++++++++++++++++++++++ cstore_reader.c | 75 ++++++------- cstore_writer.c | 19 ++-- 12 files changed, 275 insertions(+), 184 deletions(-) create mode 100644 cstore_metadata_tables.c diff --git a/.gitignore b/.gitignore index ad9061861..21c5e32ea 100644 --- a/.gitignore +++ b/.gitignore @@ -57,5 +57,6 @@ /.deps/* /regression.diffs /regression.out +.vscode *.pb-c.* diff --git a/Makefile b/Makefile index bd3ae77ce..8f1bf08cc 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,11 @@ MODULE_big = cstore_fdw -PG_CPPFLAGS = --std=c99 +PG_CPPFLAGS = -std=c11 SHLIB_LINK = -lprotobuf-c OBJS = cstore.pb-c.o cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_metadata_serialization.o cstore_compression.o mod.o + cstore_metadata_serialization.o cstore_compression.o mod.o \ + cstore_metadata_tables.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ diff --git a/cstore.c b/cstore.c index e704bc31d..a98f983e3 100644 --- a/cstore.c +++ b/cstore.c @@ -131,7 +131,7 @@ InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *csto * Initialize state to write to the cstore file. This creates an * empty data file and a valid footer file for the table. */ - writeState = CStoreBeginWrite(cstoreOptions->filename, + writeState = CStoreBeginWrite(relationId, cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); CStoreEndWrite(writeState); diff --git a/cstore.h b/cstore.h index a694e1e29..500a38cdb 100644 --- a/cstore.h +++ b/cstore.h @@ -89,6 +89,7 @@ typedef struct StripeMetadata uint64 skipListLength; uint64 dataLength; uint64 footerLength; + uint64 id; } StripeMetadata; @@ -213,6 +214,8 @@ typedef struct StripeFooter /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { + Oid relationId; + FILE *tableFile; TableFooter *tableFooter; TupleDesc tupleDescriptor; @@ -238,6 +241,7 @@ typedef struct TableReadState /* TableWriteState represents state of a cstore file write operation. */ typedef struct TableWriteState { + Oid relationId; FILE *tableFile; TableFooter *tableFooter; StringInfo tableFooterFilename; @@ -248,6 +252,7 @@ typedef struct TableWriteState Relation relation; MemoryContext stripeWriteContext; + uint64 currentStripeId; StripeBuffers *stripeBuffers; StripeSkipList *stripeSkipList; uint32 stripeMaxRowCount; @@ -270,7 +275,8 @@ extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); extern void DeleteCStoreTableFiles(char *filename); /* Function declarations for writing to a cstore file */ -extern TableWriteState * CStoreBeginWrite(const char *filename, +extern TableWriteState * CStoreBeginWrite(Oid relationId, + const char *filename, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, @@ -280,7 +286,8 @@ extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, extern void CStoreEndWrite(TableWriteState * state); /* Function declarations for reading from a cstore file */ -extern TableReadState * CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, +extern TableReadState * CStoreBeginRead(Oid relationId, const char *filename, + TupleDesc tupleDescriptor, List *projectedColumnList, List *qualConditions); extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); extern bool CStoreReadFinished(TableReadState *state); @@ -295,10 +302,14 @@ extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *co uint32 blockRowCount); extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount); -extern uint64 CStoreTableRowCount(const char *filename); +extern uint64 CStoreTableRowCount(Oid relid, const char *filename); extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); +/* cstore_metadata_tables.c */ +extern void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer); +extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount); + #endif /* CSTORE_H */ diff --git a/cstore.proto b/cstore.proto index 6e24c9075..ea949c77c 100644 --- a/cstore.proto +++ b/cstore.proto @@ -23,17 +23,12 @@ message ColumnBlockSkipList { repeated ColumnBlockSkipNode blockSkipNodeArray = 1; } -message StripeFooter { - repeated uint64 skipListSizeArray = 1; - repeated uint64 existsSizeArray = 2; - repeated uint64 valueSizeArray = 3; -} - message StripeMetadata { optional uint64 fileOffset = 1; optional uint64 skipListLength = 2; optional uint64 dataLength = 3; optional uint64 footerLength = 4; + optional uint64 id = 5; } message TableFooter { diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 2f001485f..fd526e711 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -58,31 +58,17 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); -CREATE TABLE cstore_table_metadata( - relid oid, - version_major int, - version_minor int -); - -CREATE TABLE cstore_stripe( - relid oid, - stripe bigint -); - -CREATE TABLE cstore_column_block_skip_node( +CREATE TABLE cstore_stripe_attr ( relid oid, stripe bigint, attr int, - blockid int, - rowcount bigint, - min_value text, - max_value text, - value_offset bigint, - value_length bigint, - value_compression_type char, - exists_offset bigint, - exists_length bigint); + exists_size bigint, + value_size bigint, + skiplist_size bigint +) WITH (user_catalog_table = true); -CREATE INDEX cstore_column_block_skip_node_idx - ON cstore_column_block_skip_node - USING BTREE(relid, stripe, attr, blockid); +CREATE INDEX cstore_stripe_attr_idx + ON cstore_stripe_attr + USING BTREE(relid, stripe, attr); + +ALTER TABLE cstore_stripe_attr SET SCHEMA pg_catalog; diff --git a/cstore_fdw.c b/cstore_fdw.c index 9787fd2a2..6bcb92269 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -152,7 +152,7 @@ static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel Oid foreignTableId, ForeignPath *bestPath, List *targetList, List *scanClauses); #endif -static double TupleCountEstimate(RelOptInfo *baserel, const char *filename); +static double TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename); static BlockNumber PageCount(const char *filename); static List * ColumnList(RelOptInfo *baserel, Oid foreignTableId); static void CStoreExplainForeignScan(ForeignScanState *scanState, @@ -602,7 +602,8 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) #endif /* init state to write to the cstore file */ - writeState = CStoreBeginWrite(cstoreOptions->filename, + writeState = CStoreBeginWrite(relationId, + cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, @@ -1414,6 +1415,7 @@ ValidateForeignTableOptions(char *filename, char *compressionTypeString, static char * CStoreDefaultFilePath(Oid foreignTableId) { + StringInfo cstoreFilePath = NULL; Relation relation = relation_open(foreignTableId, AccessShareLock); RelFileNode relationFileNode = relation->rd_node; Oid databaseOid = relationFileNode.dbNode; @@ -1429,7 +1431,7 @@ CStoreDefaultFilePath(Oid foreignTableId) } - StringInfo cstoreFilePath = makeStringInfo(); + cstoreFilePath = makeStringInfo(); appendStringInfo(cstoreFilePath, "%s/%s/%u/%u", DataDir, CSTORE_FDW_NAME, databaseOid, relationFileOid); @@ -1445,7 +1447,7 @@ static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - double tupleCountEstimate = TupleCountEstimate(baserel, cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, cstoreOptions->filename); double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, 0, JOIN_INNER, NULL); @@ -1492,7 +1494,7 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId double queryPageCount = relationPageCount * queryColumnRatio; double totalDiskAccessCost = seq_page_cost * queryPageCount; - double tupleCountEstimate = TupleCountEstimate(baserel, cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, cstoreOptions->filename); /* * We estimate costs almost the same way as cost_seqscan(), thus assuming @@ -1597,7 +1599,7 @@ CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, * file. */ static double -TupleCountEstimate(RelOptInfo *baserel, const char *filename) +TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename) { double tupleCountEstimate = 0.0; @@ -1616,7 +1618,7 @@ TupleCountEstimate(RelOptInfo *baserel, const char *filename) } else { - tupleCountEstimate = (double) CStoreTableRowCount(filename); + tupleCountEstimate = (double) CStoreTableRowCount(relid, filename); } return tupleCountEstimate; @@ -1809,8 +1811,8 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) whereClauseList = foreignScan->scan.plan.qual; columnList = (List *) linitial(foreignPrivateList); - readState = CStoreBeginRead(cstoreOptions->filename, tupleDescriptor, - columnList, whereClauseList); + readState = CStoreBeginRead(foreignTableId, cstoreOptions->filename, + tupleDescriptor, columnList, whereClauseList); scanState->fdw_state = (void *) readState; } @@ -2161,7 +2163,8 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - writeState = CStoreBeginWrite(cstoreOptions->filename, + writeState = CStoreBeginWrite(foreignTableOid, + cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c index 67ae2ec2c..94e3c3116 100644 --- a/cstore_metadata_serialization.c +++ b/cstore_metadata_serialization.c @@ -95,6 +95,8 @@ SerializeTableFooter(TableFooter *tableFooter) protobufStripeMetadata->datalength = stripeMetadata->dataLength; protobufStripeMetadata->has_footerlength = true; protobufStripeMetadata->footerlength = stripeMetadata->footerLength; + protobufStripeMetadata->has_id = true; + protobufStripeMetadata->id = stripeMetadata->id; stripeMetadataArray[stripeIndex] = protobufStripeMetadata; stripeIndex++; @@ -118,38 +120,6 @@ SerializeTableFooter(TableFooter *tableFooter) } -/* - * SerializeStripeFooter serializes given stripe footer and returns the result - * as a StringInfo. - */ -StringInfo -SerializeStripeFooter(StripeFooter *stripeFooter) -{ - StringInfo stripeFooterBuffer = NULL; - Protobuf__StripeFooter protobufStripeFooter = PROTOBUF__STRIPE_FOOTER__INIT; - uint8 *stripeFooterData = NULL; - uint32 stripeFooterSize = 0; - - protobufStripeFooter.n_skiplistsizearray = stripeFooter->columnCount; - protobufStripeFooter.skiplistsizearray = (uint64_t *) stripeFooter->skipListSizeArray; - protobufStripeFooter.n_existssizearray = stripeFooter->columnCount; - protobufStripeFooter.existssizearray = (uint64_t *) stripeFooter->existsSizeArray; - protobufStripeFooter.n_valuesizearray = stripeFooter->columnCount; - protobufStripeFooter.valuesizearray = (uint64_t *) stripeFooter->valueSizeArray; - - stripeFooterSize = protobuf__stripe_footer__get_packed_size(&protobufStripeFooter); - stripeFooterData = palloc0(stripeFooterSize); - protobuf__stripe_footer__pack(&protobufStripeFooter, stripeFooterData); - - stripeFooterBuffer = palloc0(sizeof(StringInfoData)); - stripeFooterBuffer->len = stripeFooterSize; - stripeFooterBuffer->maxlen = stripeFooterSize; - stripeFooterBuffer->data = (char *) stripeFooterData; - - return stripeFooterBuffer; -} - - /* * SerializeColumnSkipList serializes a column skip list, where the colum skip * list includes all block skip nodes for that column. The function then returns @@ -315,6 +285,7 @@ DeserializeTableFooter(StringInfo buffer) stripeMetadata->skipListLength = protobufStripeMetadata->skiplistlength; stripeMetadata->dataLength = protobufStripeMetadata->datalength; stripeMetadata->footerLength = protobufStripeMetadata->footerlength; + stripeMetadata->id = protobufStripeMetadata->id; stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); } @@ -329,59 +300,6 @@ DeserializeTableFooter(StringInfo buffer) } -/* - * DeserializeStripeFooter deserializes the given buffer and returns the result - * as a StripeFooter struct. - */ -StripeFooter * -DeserializeStripeFooter(StringInfo buffer) -{ - StripeFooter *stripeFooter = NULL; - Protobuf__StripeFooter *protobufStripeFooter = NULL; - uint64 *skipListSizeArray = NULL; - uint64 *existsSizeArray = NULL; - uint64 *valueSizeArray = NULL; - uint64 sizeArrayLength = 0; - uint32 columnCount = 0; - - protobufStripeFooter = protobuf__stripe_footer__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufStripeFooter == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid stripe footer buffer"))); - } - - columnCount = protobufStripeFooter->n_skiplistsizearray; - if (protobufStripeFooter->n_existssizearray != columnCount || - protobufStripeFooter->n_valuesizearray != columnCount) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("stripe size array lengths don't match"))); - } - - sizeArrayLength = columnCount * sizeof(uint64); - - skipListSizeArray = palloc0(sizeArrayLength); - existsSizeArray = palloc0(sizeArrayLength); - valueSizeArray = palloc0(sizeArrayLength); - - memcpy(skipListSizeArray, protobufStripeFooter->skiplistsizearray, sizeArrayLength); - memcpy(existsSizeArray, protobufStripeFooter->existssizearray, sizeArrayLength); - memcpy(valueSizeArray, protobufStripeFooter->valuesizearray, sizeArrayLength); - - protobuf__stripe_footer__free_unpacked(protobufStripeFooter, NULL); - - stripeFooter = palloc0(sizeof(StripeFooter)); - stripeFooter->skipListSizeArray = skipListSizeArray; - stripeFooter->existsSizeArray = existsSizeArray; - stripeFooter->valueSizeArray = valueSizeArray; - stripeFooter->columnCount = columnCount; - - return stripeFooter; -} - - /* * DeserializeBlockCount deserializes the given column skip list buffer and * returns the number of blocks in column skip list. diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h index b8890a5d4..d5b7c90ff 100644 --- a/cstore_metadata_serialization.h +++ b/cstore_metadata_serialization.h @@ -17,7 +17,6 @@ /* Function declarations for metadata serialization */ extern StringInfo SerializePostScript(uint64 tableFooterLength); extern StringInfo SerializeTableFooter(TableFooter *tableFooter); -extern StringInfo SerializeStripeFooter(StripeFooter *stripeFooter); extern StringInfo SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, bool typeByValue, int typeLength); @@ -27,7 +26,6 @@ extern void DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength); extern TableFooter * DeserializeTableFooter(StringInfo buffer); extern uint32 DeserializeBlockCount(StringInfo buffer); extern uint32 DeserializeRowCount(StringInfo buffer); -extern StripeFooter * DeserializeStripeFooter(StringInfo buffer); extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, bool typeByValue, int typeLength, uint32 blockCount); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c new file mode 100644 index 000000000..e2d003989 --- /dev/null +++ b/cstore_metadata_tables.c @@ -0,0 +1,186 @@ +/*------------------------------------------------------------------------- + * + * cstore_metadata_tables.c + * + * Copyright (c), Citus Data, Inc. + * + *------------------------------------------------------------------------- + */ + + +#include "postgres.h" +#include "cstore.h" +#include "cstore_version_compat.h" + +#include +#include "access/nbtree.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/indexing.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_collation.h" +#include "commands/defrem.h" +#include "lib/stringinfo.h" +#include "optimizer/optimizer.h" +#include "port.h" +#include "storage/fd.h" +#include "utils/fmgroids.h" +#include "utils/memutils.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + +#include "cstore_metadata_serialization.h" + +static Oid CStoreStripeAttrRelationId(void); +static Oid CStoreStripeAttrIndexRelationId(void); +static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, + uint64 existsSize, uint64 valuesSize, + uint64 skiplistSize); + +/* constants for cstore_stripe_attr */ +#define Natts_cstore_stripe_attr 6 +#define Anum_cstore_stripe_attr_relid 1 +#define Anum_cstore_stripe_attr_stripe 2 +#define Anum_cstore_stripe_attr_attr 3 +#define Anum_cstore_stripe_attr_exists_size 4 +#define Anum_cstore_stripe_attr_value_size 5 +#define Anum_cstore_stripe_attr_skiplist_size 6 + +/* + * SaveStripeFooter stores give StripeFooter as cstore_stripe_attr records. + */ +void +SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer) +{ + for (AttrNumber attr = 1; attr <= footer->columnCount; attr++) + { + InsertStripeAttrRow(relid, stripe, attr, + footer->existsSizeArray[attr - 1], + footer->valueSizeArray[attr - 1], + footer->skipListSizeArray[attr - 1]); + } +} + + +/* + * InsertStripeAttrRow adds a row to cstore_stripe_attr. + */ +static void +InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, + uint64 existsSize, uint64 valuesSize, + uint64 skiplistSize) +{ + bool nulls[Natts_cstore_stripe_attr] = { 0 }; + Datum values[Natts_cstore_stripe_attr] = { + ObjectIdGetDatum(relid), + Int64GetDatum(stripe), + Int16GetDatum(attr), + Int64GetDatum(existsSize), + Int64GetDatum(valuesSize), + Int64GetDatum(skiplistSize) + }; + + Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); + Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, RowExclusiveLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripeAttrs); + + HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + + CatalogTupleInsert(cstoreStripeAttrs, tuple); + + CommandCounterIncrement(); + + heap_close(cstoreStripeAttrs, NoLock); +} + + +/* + * ReadStripeFooter returns a StripeFooter by reading relevant records from + * cstore_stripe_attr. + */ +StripeFooter * +ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) +{ + StripeFooter *footer = NULL; + HeapTuple heapTuple; + + Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); + Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, AccessShareLock); + Relation index = index_open(CStoreStripeAttrIndexRelationId(), AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripeAttrs); + + SysScanDesc scanDescriptor = NULL; + ScanKeyData scanKey[2]; + ScanKeyInit(&scanKey[0], Anum_cstore_stripe_attr_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[1], Anum_cstore_stripe_attr_stripe, + BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(stripe)); + + scanDescriptor = systable_beginscan_ordered(cstoreStripeAttrs, index, NULL, 2, + scanKey); + + footer = palloc0(sizeof(StripeFooter)); + footer->existsSizeArray = palloc0(relationColumnCount * sizeof(int64)); + footer->valueSizeArray = palloc0(relationColumnCount * sizeof(int64)); + footer->skipListSizeArray = palloc0(relationColumnCount * sizeof(int64)); + + /* + * Stripe can have less columns than the relation if ALTER TABLE happens + * after stripe is formed. So we calculate column count of a stripe as + * maximum attribute number for that stripe. + */ + footer->columnCount = 0; + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + Datum datumArray[Natts_cstore_stripe_attr]; + bool isNullArray[Natts_cstore_stripe_attr]; + AttrNumber attr = 0; + + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + attr = DatumGetInt16(datumArray[2]); + + footer->columnCount = Max(footer->columnCount, attr); + + while (attr > relationColumnCount) + { + ereport(ERROR, (errmsg("unexpected attribute %d for a relation with %d attrs", + attr, relationColumnCount))); + } + + footer->existsSizeArray[attr - 1] = + DatumGetInt64(datumArray[Anum_cstore_stripe_attr_exists_size - 1]); + footer->valueSizeArray[attr - 1] = + DatumGetInt64(datumArray[Anum_cstore_stripe_attr_value_size - 1]); + footer->skipListSizeArray[attr - 1] = + DatumGetInt64(datumArray[Anum_cstore_stripe_attr_skiplist_size - 1]); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreStripeAttrs, NoLock); + + return footer; +} + + +/* + * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripeAttrRelationId(void) +{ + return get_relname_relid("cstore_stripe_attr", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr_idx. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripeAttrIndexRelationId(void) +{ + return get_relname_relid("cstore_stripe_attr_idx", PG_CATALOG_NAMESPACE); +} diff --git a/cstore_reader.c b/cstore_reader.c index 68ce5cdad..6caf99bc7 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -31,6 +31,7 @@ #include "storage/fd.h" #include "utils/memutils.h" #include "utils/lsyscache.h" +#include "utils/rel.h" #include "cstore.h" #include "cstore_metadata_serialization.h" @@ -39,6 +40,7 @@ /* static function declarations */ static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, + StripeFooter *stripeFooter, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList); @@ -51,8 +53,6 @@ static ColumnBuffers * LoadColumnBuffers(FILE *tableFile, uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, Form_pg_attribute attributeForm); -static StripeFooter * LoadStripeFooter(FILE *tableFile, StripeMetadata *stripeMetadata, - uint32 columnCount); static StripeSkipList * LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, @@ -86,7 +86,8 @@ static int64 FILESize(FILE *file); static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size); static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount); -static uint64 StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata); +static uint64 StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata); +static int RelationColumnCount(Oid relid); /* @@ -94,7 +95,7 @@ static uint64 StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata); * read handle that's used during reading rows and finishing the read operation. */ TableReadState * -CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, +CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; @@ -136,6 +137,7 @@ CStoreBeginRead(const char *filename, TupleDesc tupleDescriptor, tableFooter->blockRowCount); readState = palloc0(sizeof(TableReadState)); + readState->relationId = relationId; readState->tableFile = tableFile; readState->tableFooter = tableFooter; readState->projectedColumnList = projectedColumnList; @@ -247,6 +249,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu StripeMetadata *stripeMetadata = NULL; List *stripeMetadataList = tableFooter->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); + StripeFooter *stripeFooter = NULL; /* if we have read all stripes, return false */ if (readState->readStripeCount == stripeCount) @@ -258,7 +261,11 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu MemoryContextReset(readState->stripeReadContext); stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); + stripeFooter = ReadStripeFooter(readState->relationId, + stripeMetadata->id, + readState->tupleDescriptor->natts); stripeBuffers = LoadFilteredStripeBuffers(readState->tableFile, stripeMetadata, + stripeFooter, readState->tupleDescriptor, readState->projectedColumnList, readState->whereClauseList); @@ -396,7 +403,7 @@ FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) /* CStoreTableRowCount returns the exact row count of a table using skiplists */ uint64 -CStoreTableRowCount(const char *filename) +CStoreTableRowCount(Oid relid, const char *filename) { TableFooter *tableFooter = NULL; FILE *tableFile; @@ -422,7 +429,7 @@ CStoreTableRowCount(const char *filename) foreach(stripeMetadataCell, tableFooter->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); - totalRowCount += StripeRowCount(tableFile, stripeMetadata); + totalRowCount += StripeRowCount(relid, tableFile, stripeMetadata); } FreeFile(tableFile); @@ -436,20 +443,13 @@ CStoreTableRowCount(const char *filename) * skip list, and returns number of rows for given stripe. */ static uint64 -StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata) +StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata) { uint64 rowCount = 0; - StripeFooter *stripeFooter = NULL; - StringInfo footerBuffer = NULL; StringInfo firstColumnSkipListBuffer = NULL; - uint64 footerOffset = 0; - footerOffset += stripeMetadata->fileOffset; - footerOffset += stripeMetadata->skipListLength; - footerOffset += stripeMetadata->dataLength; - - footerBuffer = ReadFromFile(tableFile, footerOffset, stripeMetadata->footerLength); - stripeFooter = DeserializeStripeFooter(footerBuffer); + StripeFooter * stripeFooter = ReadStripeFooter(relid, stripeMetadata->id, + RelationColumnCount(relid)); firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, stripeFooter->skipListSizeArray[0]); @@ -466,8 +466,8 @@ StripeRowCount(FILE *tableFile, StripeMetadata *stripeMetadata) */ static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, - TupleDesc tupleDescriptor, List *projectedColumnList, - List *whereClauseList) + StripeFooter *stripeFooter, TupleDesc tupleDescriptor, + List *projectedColumnList, List *whereClauseList) { StripeBuffers *stripeBuffers = NULL; ColumnBuffers **columnBuffersArray = NULL; @@ -475,8 +475,6 @@ LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, uint32 columnIndex = 0; uint32 columnCount = tupleDescriptor->natts; - StripeFooter *stripeFooter = LoadStripeFooter(tableFile, stripeMetadata, - columnCount); bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); StripeSkipList *stripeSkipList = LoadStripeSkipList(tableFile, stripeMetadata, @@ -617,31 +615,6 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, } -/* Reads and returns the given stripe's footer. */ -static StripeFooter * -LoadStripeFooter(FILE *tableFile, StripeMetadata *stripeMetadata, - uint32 columnCount) -{ - StripeFooter *stripeFooter = NULL; - StringInfo footerBuffer = NULL; - uint64 footerOffset = 0; - - footerOffset += stripeMetadata->fileOffset; - footerOffset += stripeMetadata->skipListLength; - footerOffset += stripeMetadata->dataLength; - - footerBuffer = ReadFromFile(tableFile, footerOffset, stripeMetadata->footerLength); - stripeFooter = DeserializeStripeFooter(footerBuffer); - if (stripeFooter->columnCount > columnCount) - { - ereport(ERROR, (errmsg("stripe footer column count and table column count " - "don't match"))); - } - - return stripeFooter; -} - - /* Reads the skip list for the given stripe. */ static StripeSkipList * LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, @@ -1377,3 +1350,15 @@ ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount) } } } + + +static int +RelationColumnCount(Oid relid) +{ + Relation rel = RelationIdGetRelation(relid); + TupleDesc tupleDesc = RelationGetDescr(rel); + int columnCount = tupleDesc->natts; + RelationClose(rel); + + return columnCount; +} diff --git a/cstore_writer.c b/cstore_writer.c index 51a01c8f3..76fc703f3 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -65,7 +65,8 @@ static StringInfo CopyStringInfo(StringInfo sourceString); * will be added. */ TableWriteState * -CStoreBeginWrite(const char *filename, CompressionType compressionType, +CStoreBeginWrite(Oid relationId, + const char *filename, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) { @@ -82,6 +83,7 @@ CStoreBeginWrite(const char *filename, CompressionType compressionType, int statResult = 0; bool *columnMaskArray = NULL; ColumnBlockData **blockData = NULL; + uint64 currentStripeId = 0; tableFooterFilename = makeStringInfo(); appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); @@ -130,6 +132,7 @@ CStoreBeginWrite(const char *filename, CompressionType compressionType, lastStripeSize += lastStripe->footerLength; currentFileOffset = lastStripe->fileOffset + lastStripeSize; + currentStripeId = lastStripe->id + 1; errno = 0; fseekResult = fseeko(tableFile, currentFileOffset, SEEK_SET); @@ -173,6 +176,7 @@ CStoreBeginWrite(const char *filename, CompressionType compressionType, blockData = CreateEmptyBlockDataArray(columnCount, columnMaskArray, blockRowCount); writeState = palloc0(sizeof(TableWriteState)); + writeState->relationId = relationId; writeState->tableFile = tableFile; writeState->tableFooterFilename = tableFooterFilename; writeState->tableFooter = tableFooter; @@ -186,6 +190,7 @@ CStoreBeginWrite(const char *filename, CompressionType compressionType, writeState->stripeWriteContext = stripeWriteContext; writeState->blockDataArray = blockData; writeState->compressionBuffer = NULL; + writeState->currentStripeId = currentStripeId; return writeState; } @@ -286,6 +291,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeMetadata stripeMetadata = FlushStripe(writeState); MemoryContextReset(writeState->stripeWriteContext); + writeState->currentStripeId++; + /* set stripe data and skip list to NULL so they are recreated next time */ writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; @@ -490,7 +497,6 @@ FlushStripe(TableWriteState *writeState) uint64 dataLength = 0; StringInfo *skipListBufferArray = NULL; StripeFooter *stripeFooter = NULL; - StringInfo stripeFooterBuffer = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; TableFooter *tableFooter = writeState->tableFooter; @@ -545,7 +551,6 @@ FlushStripe(TableWriteState *writeState) /* create skip list and footer buffers */ skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); - stripeFooterBuffer = SerializeStripeFooter(stripeFooter); /* * Each stripe has three sections: @@ -594,7 +599,9 @@ FlushStripe(TableWriteState *writeState) } /* finally, we flush the footer buffer */ - WriteToFile(tableFile, stripeFooterBuffer->data, stripeFooterBuffer->len); + SaveStripeFooter(writeState->relationId, + writeState->currentStripeId, + stripeFooter); /* set stripe metadata */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) @@ -607,12 +614,12 @@ FlushStripe(TableWriteState *writeState) stripeMetadata.fileOffset = writeState->currentFileOffset; stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; - stripeMetadata.footerLength = stripeFooterBuffer->len; + stripeMetadata.footerLength = 0; + stripeMetadata.id = writeState->currentStripeId; /* advance current file offset */ writeState->currentFileOffset += skipListLength; writeState->currentFileOffset += dataLength; - writeState->currentFileOffset += stripeFooterBuffer->len; return stripeMetadata; } From b74de68ce3f4c9e83f10f169a646ea192908d6c3 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 7 Sep 2020 15:48:23 -0700 Subject: [PATCH 007/124] Add 'make reindent' --- .gitattributes | 26 ++++++++ Makefile | 3 + cstore.c | 8 ++- cstore.h | 16 +---- cstore_compression.c | 17 ++--- cstore_fdw.c | 112 +++++++++++++++++--------------- cstore_fdw.h | 2 +- cstore_metadata_serialization.c | 8 +-- cstore_metadata_serialization.h | 2 +- cstore_reader.c | 31 +++++---- cstore_version_compat.h | 7 +- cstore_writer.c | 16 +++-- mod.c | 7 +- 13 files changed, 143 insertions(+), 112 deletions(-) create mode 100644 .gitattributes diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..215ae1909 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,26 @@ +* whitespace=space-before-tab,trailing-space +*.[chly] whitespace=space-before-tab,trailing-space,indent-with-non-tab,tabwidth=4 +*.dsl whitespace=space-before-tab,trailing-space,tab-in-indent +*.patch -whitespace +*.pl whitespace=space-before-tab,trailing-space,tabwidth=4 +*.po whitespace=space-before-tab,trailing-space,tab-in-indent,-blank-at-eof +*.sgml whitespace=space-before-tab,trailing-space,tab-in-indent,-blank-at-eol +*.x[ms]l whitespace=space-before-tab,trailing-space,tab-in-indent + +# Avoid confusing ASCII underlines with leftover merge conflict markers +README conflict-marker-size=32 +README.* conflict-marker-size=32 + +# Certain data files that contain special whitespace, and other special cases +*.data -whitespace + +# Test output files that contain extra whitespace +*.out -whitespace +src/test/regress/output/*.source -whitespace + +# These files are maintained or generated elsewhere. We take them as is. +configure -whitespace + +# all C files (implementation and header) use our style... +*.[ch] citus-style + diff --git a/Makefile b/Makefile index 8f1bf08cc..10d7fcc14 100644 --- a/Makefile +++ b/Makefile @@ -58,3 +58,6 @@ installcheck: remove_cstore_files remove_cstore_files: rm -f data/*.cstore data/*.cstore.footer + +reindent: + citus_indent . diff --git a/cstore.c b/cstore.c index a98f983e3..a259f0430 100644 --- a/cstore.c +++ b/cstore.c @@ -43,6 +43,7 @@ ParseCompressionType(const char *compressionTypeString) return compressionType; } + /* CreateDirectory creates a new directory with the given directory name. */ static void CreateDirectory(StringInfo directoryName) @@ -56,6 +57,7 @@ CreateDirectory(StringInfo directoryName) } } + /* DirectoryExists checks if a directory exists for the given directory name. */ static bool DirectoryExists(StringInfo directoryName) @@ -91,6 +93,7 @@ DirectoryExists(StringInfo directoryName) return directoryExists; } + /* * RemoveCStoreDatabaseDirectory removes CStore directory previously * created for this database. @@ -132,8 +135,9 @@ InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *csto * empty data file and a valid footer file for the table. */ writeState = CStoreBeginWrite(relationId, cstoreOptions->filename, - cstoreOptions->compressionType, cstoreOptions->stripeRowCount, - cstoreOptions->blockRowCount, tupleDescriptor); + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupleDescriptor); CStoreEndWrite(writeState); } diff --git a/cstore.h b/cstore.h index 500a38cdb..ed850d9ef 100644 --- a/cstore.h +++ b/cstore.h @@ -60,7 +60,6 @@ typedef enum COMPRESSION_PG_LZ = 1, COMPRESSION_COUNT - } CompressionType; @@ -75,7 +74,6 @@ typedef struct CStoreOptions CompressionType compressionType; uint64 stripeRowCount; uint32 blockRowCount; - } CStoreOptions; @@ -90,7 +88,6 @@ typedef struct StripeMetadata uint64 dataLength; uint64 footerLength; uint64 id; - } StripeMetadata; @@ -99,7 +96,6 @@ typedef struct TableFooter { List *stripeMetadataList; uint64 blockRowCount; - } TableFooter; @@ -123,7 +119,6 @@ typedef struct ColumnBlockSkipNode uint64 existsLength; CompressionType valueCompressionType; - } ColumnBlockSkipNode; @@ -137,7 +132,6 @@ typedef struct StripeSkipList ColumnBlockSkipNode **blockSkipNodeArray; uint32 columnCount; uint32 blockCount; - } StripeSkipList; @@ -155,7 +149,6 @@ typedef struct ColumnBlockData /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ StringInfo valueBuffer; - } ColumnBlockData; @@ -171,7 +164,6 @@ typedef struct ColumnBlockBuffers StringInfo existsBuffer; StringInfo valueBuffer; CompressionType valueCompressionType; - } ColumnBlockBuffers; @@ -182,7 +174,6 @@ typedef struct ColumnBlockBuffers typedef struct ColumnBuffers { ColumnBlockBuffers **blockBuffersArray; - } ColumnBuffers; @@ -192,7 +183,6 @@ typedef struct StripeBuffers uint32 columnCount; uint32 rowCount; ColumnBuffers **columnBuffersArray; - } StripeBuffers; @@ -207,7 +197,6 @@ typedef struct StripeFooter uint64 *skipListSizeArray; uint64 *existsSizeArray; uint64 *valueSizeArray; - } StripeFooter; @@ -234,7 +223,6 @@ typedef struct TableReadState uint64 stripeReadRowCount; ColumnBlockData **blockDataArray; int32 deserializedBlockIndex; - } TableReadState; @@ -257,6 +245,7 @@ typedef struct TableWriteState StripeSkipList *stripeSkipList; uint32 stripeMaxRowCount; ColumnBlockData **blockDataArray; + /* * compressionBuffer buffer is used as temporary storage during * data value compression operation. It is kept here to minimize @@ -264,7 +253,6 @@ typedef struct TableWriteState * deallocated when memory context is reset. */ StringInfo compressionBuffer; - } TableWriteState; extern CompressionType ParseCompressionType(const char *compressionTypeString); @@ -283,7 +271,7 @@ extern TableWriteState * CStoreBeginWrite(Oid relationId, TupleDesc tupleDescriptor); extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, bool *columnNulls); -extern void CStoreEndWrite(TableWriteState * state); +extern void CStoreEndWrite(TableWriteState *state); /* Function declarations for reading from a cstore file */ extern TableReadState * CStoreBeginRead(Oid relationId, const char *filename, diff --git a/cstore_compression.c b/cstore_compression.c index a3c5f9f7d..f6122614a 100644 --- a/cstore_compression.c +++ b/cstore_compression.c @@ -22,38 +22,39 @@ #include "cstore.h" - #if PG_VERSION_NUM >= 90500 + /* * The information at the start of the compressed data. This decription is taken * from pg_lzcompress in pre-9.5 version of PostgreSQL. */ typedef struct CStoreCompressHeader { - int32 vl_len_; /* varlena header (do not touch directly!) */ - int32 rawsize; + int32 vl_len_; /* varlena header (do not touch directly!) */ + int32 rawsize; } CStoreCompressHeader; /* * Utilities for manipulation of header information for compressed data */ -#define CSTORE_COMPRESS_HDRSZ ((int32) sizeof(CStoreCompressHeader)) +#define CSTORE_COMPRESS_HDRSZ ((int32) sizeof(CStoreCompressHeader)) #define CSTORE_COMPRESS_RAWSIZE(ptr) (((CStoreCompressHeader *) (ptr))->rawsize) #define CSTORE_COMPRESS_RAWDATA(ptr) (((char *) (ptr)) + CSTORE_COMPRESS_HDRSZ) -#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = (len)) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = \ + (len)) #else -#define CSTORE_COMPRESS_HDRSZ (0) +#define CSTORE_COMPRESS_HDRSZ (0) #define CSTORE_COMPRESS_RAWSIZE(ptr) (PGLZ_RAW_SIZE((PGLZ_Header *) buffer->data)) #define CSTORE_COMPRESS_RAWDATA(ptr) (((PGLZ_Header *) (ptr))) -#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = (len)) +#define CSTORE_COMPRESS_SET_RAWSIZE(ptr, len) (((CStoreCompressHeader *) (ptr))->rawsize = \ + (len)) #endif - /* * CompressBuffer compresses the given buffer with the given compression type * outputBuffer enlarged to contain compressed data. The function returns true diff --git a/cstore_fdw.c b/cstore_fdw.c index 6bcb92269..7d43c07d5 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -85,7 +85,6 @@ typedef struct CStoreValidOption { const char *optionName; Oid optionContextId; - } CStoreValidOption; #define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" @@ -114,13 +113,13 @@ static void CStoreProcessUtility(Node *parseTree, const char *queryString, ParamListInfo paramListInfo, DestReceiver *destReceiver, char *completionTag); #endif -static bool CopyCStoreTableStatement(CopyStmt* copyStatement); -static void CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement); +static bool CopyCStoreTableStatement(CopyStmt *copyStatement); +static void CheckSuperuserPrivilegesForCopy(const CopyStmt *copyStatement); static void CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, char *completionTag); static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString); -static uint64 CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString); +static uint64 CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString); static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); static List * DroppedCStoreFilenameList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); @@ -168,7 +167,7 @@ static int CStoreAcquireSampleRows(Relation relation, int logLevel, HeapTuple *sampleRows, int targetRowCount, double *totalRowCount, double *totalDeadRowCount); static List * CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, - Index resultRelation, int subplanIndex); + Index resultRelation, int subplanIndex); static void CStoreBeginForeignModify(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo, List *fdwPrivate, int subplanIndex, int executorflags); @@ -201,7 +200,8 @@ static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; * previous utility hook, and then install our hook to pre-intercept calls to * the copy command. */ -void cstore_fdw_init() +void +cstore_fdw_init() { PreviousProcessUtilityHook = ProcessUtility_hook; ProcessUtility_hook = CStoreProcessUtility; @@ -212,7 +212,8 @@ void cstore_fdw_init() * Called when the module is unloaded. This function uninstalls the * extension's hooks. */ -void cstore_fdw_finish() +void +cstore_fdw_finish() { ProcessUtility_hook = PreviousProcessUtilityHook; } @@ -296,10 +297,10 @@ CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, DestReceiver *destReceiver, char *completionTag) #else static void -CStoreProcessUtility(Node * parseTree, const char *queryString, +CStoreProcessUtility(Node * parseTree, const char * queryString, ProcessUtilityContext context, ParamListInfo paramListInfo, - DestReceiver *destReceiver, char *completionTag) + DestReceiver * destReceiver, char * completionTag) #endif { #if PG_VERSION_NUM >= 100000 @@ -387,11 +388,12 @@ CStoreProcessUtility(Node * parseTree, const char *queryString, CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - /* restore the former relation list. Our - * replacement could be freed but still needed - * in a cached plan. A truncate can be cached - * if run from a pl/pgSQL function */ - truncateStatement->relations = allTablesList; + + /* restore the former relation list. Our + * replacement could be freed but still needed + * in a cached plan. A truncate can be cached + * if run from a pl/pgSQL function */ + truncateStatement->relations = allTablesList; } TruncateCStoreTables(cstoreRelationList); @@ -439,7 +441,7 @@ CStoreProcessUtility(Node * parseTree, const char *queryString, * true. The function returns false otherwise. */ static bool -CopyCStoreTableStatement(CopyStmt* copyStatement) +CopyCStoreTableStatement(CopyStmt *copyStatement) { bool copyCStoreTableStatement = false; @@ -474,7 +476,7 @@ CopyCStoreTableStatement(CopyStmt* copyStatement) * copy operation and reports error if user does not have superuser rights. */ static void -CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement) +CheckSuperuserPrivilegesForCopy(const CopyStmt *copyStatement) { /* * We disallow copy from file or program except to superusers. These checks @@ -485,16 +487,16 @@ CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement) if (copyStatement->is_program) { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be superuser to COPY to or from a program"), - errhint("Anyone can COPY to stdout or from stdin. " - "psql's \\copy command also works for anyone."))); + errmsg("must be superuser to COPY to or from a program"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); } else { ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be superuser to COPY to or from a file"), - errhint("Anyone can COPY to stdout or from stdin. " - "psql's \\copy command also works for anyone."))); + errmsg("must be superuser to COPY to or from a file"), + errhint("Anyone can COPY to stdout or from stdin. " + "psql's \\copy command also works for anyone."))); } } } @@ -505,7 +507,7 @@ CheckSuperuserPrivilegesForCopy(const CopyStmt* copyStatement) * It determines the copy direction and forwards execution to appropriate function. */ static void -CStoreProcessCopyCommand(CopyStmt *copyStatement, const char* queryString, +CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, char *completionTag) { uint64 processedCount = 0; @@ -648,7 +650,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) * stream. Copying selected columns from cstore table is not currently supported. */ static uint64 -CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString) +CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString) { uint64 processedCount = 0; RangeVar *relation = NULL; @@ -682,6 +684,7 @@ CopyOutCStoreTable(CopyStmt* copyStatement, const char* queryString) copyStatement->relation = NULL; #if (PG_VERSION_NUM >= 100000) + /* * raw_parser returns list of RawStmt* in PG 10+ we need to * extract actual query from it. @@ -737,7 +740,7 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) foreach(commandCell, commandList) { AlterTableCmd *alterCommand = (AlterTableCmd *) lfirst(commandCell); - if(alterCommand->subtype == AT_AlterColumnType) + if (alterCommand->subtype == AT_AlterColumnType) { char *columnName = alterCommand->name; ColumnDef *columnDef = (ColumnDef *) alterCommand->def; @@ -849,7 +852,7 @@ OpenRelationsForTruncate(List *cstoreTableList) Relation relation = heap_openrv(rangeVar, AccessExclusiveLock); Oid relationId = relation->rd_id; AclResult aclresult = pg_class_aclcheck(relationId, GetUserId(), - ACL_TRUNCATE); + ACL_TRUNCATE); if (aclresult != ACLCHECK_OK) { aclcheck_error(aclresult, ACLCHECK_OBJECT_TABLE, get_rel_name(relationId)); @@ -890,6 +893,7 @@ TruncateCStoreTables(List *cstoreRelationList) } } + /* * CStoreTable checks if the given table name belongs to a foreign columnar store * table. If it does, the function returns true. Otherwise, it returns false. @@ -996,23 +1000,20 @@ DistributedTable(Oid relationId) static bool DistributedWorkerCopy(CopyStmt *copyStatement) { - ListCell *optionCell = NULL; - foreach(optionCell, copyStatement->options) - { - DefElem *defel = (DefElem *) lfirst(optionCell); - if (strncmp(defel->defname, "master_host", NAMEDATALEN) == 0) - { - return true; - } - } + ListCell *optionCell = NULL; + foreach(optionCell, copyStatement->options) + { + DefElem *defel = (DefElem *) lfirst(optionCell); + if (strncmp(defel->defname, "master_host", NAMEDATALEN) == 0) + { + return true; + } + } - return false; + return false; } - - - /* * cstore_table_size returns the total on-disk size of a cstore table in bytes. * The result includes the sizes of data file and footer file. @@ -1056,7 +1057,7 @@ cstore_table_size(PG_FUNCTION_ARGS) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", - footerFilename->data))); + footerFilename->data))); } tableSize += dataFileStatBuffer.st_size; @@ -1428,7 +1429,6 @@ CStoreDefaultFilePath(Oid foreignTableId) { databaseOid = MyDatabaseId; relationFileOid = foreignTableId; - } cstoreFilePath = makeStringInfo(); @@ -1447,7 +1447,8 @@ static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, + cstoreOptions->filename); double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, 0, JOIN_INNER, NULL); @@ -1494,7 +1495,8 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId double queryPageCount = relationPageCount * queryColumnRatio; double totalDiskAccessCost = seq_page_cost * queryPageCount; - double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, + cstoreOptions->filename); /* * We estimate costs almost the same way as cost_seqscan(), thus assuming @@ -1505,7 +1507,7 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId double totalCpuCost = cpuCostPerTuple * tupleCountEstimate; double startupCost = baserel->baserestrictcost.startup; - double totalCost = startupCost + totalCpuCost + totalDiskAccessCost; + double totalCost = startupCost + totalCpuCost + totalDiskAccessCost; /* create a foreign path node and add it as the only possible path */ #if PG_VERSION_NUM >= 90600 @@ -1550,8 +1552,8 @@ CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, Plan *outerPlan) #else static ForeignScan * -CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId, - ForeignPath *bestPath, List *targetList, List *scanClauses) +CStoreGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreignTableId, + ForeignPath * bestPath, List * targetList, List * scanClauses) #endif { ForeignScan *foreignScan = NULL; @@ -1720,7 +1722,7 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) { ListCell *neededColumnCell = NULL; Var *column = NULL; - Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex - 1); + Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex - 1); if (attributeForm->attisdropped) { @@ -1920,7 +1922,7 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, { int sampleRowCount = 0; double rowCount = 0.0; - double rowCountToSkip = -1; /* -1 means not set yet */ + double rowCountToSkip = -1; /* -1 means not set yet */ double selectionState = 0; MemoryContext oldContext = CurrentMemoryContext; MemoryContext tupleContext = NULL; @@ -1948,7 +1950,8 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, if (!attributeForm->attisdropped) { Var *column = makeVar(tableId, columnIndex + 1, attributeForm->atttypid, - attributeForm->atttypmod, attributeForm->attcollation, 0); + attributeForm->atttypmod, attributeForm->attcollation, + 0); columnList = lappend(columnList, column); } } @@ -2139,7 +2142,7 @@ CStoreBeginForeignModify(ModifyTableState *modifyTableState, return; } - Assert (modifyTableState->operation == CMD_INSERT); + Assert(modifyTableState->operation == CMD_INSERT); CStoreBeginForeignInsert(modifyTableState, relationInfo); } @@ -2152,7 +2155,7 @@ CStoreBeginForeignModify(ModifyTableState *modifyTableState, static void CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo) { - Oid foreignTableOid = InvalidOid; + Oid foreignTableOid = InvalidOid; CStoreOptions *cstoreOptions = NULL; TupleDesc tupleDescriptor = NULL; TableWriteState *writeState = NULL; @@ -2183,7 +2186,7 @@ static TupleTableSlot * CStoreExecForeignInsert(EState *executorState, ResultRelInfo *relationInfo, TupleTableSlot *tupleSlot, TupleTableSlot *planSlot) { - TableWriteState *writeState = (TableWriteState*) relationInfo->ri_FdwState; + TableWriteState *writeState = (TableWriteState *) relationInfo->ri_FdwState; HeapTuple heapTuple; Assert(writeState != NULL); @@ -2224,7 +2227,7 @@ CStoreEndForeignModify(EState *executorState, ResultRelInfo *relationInfo) static void CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) { - TableWriteState *writeState = (TableWriteState*) relationInfo->ri_FdwState; + TableWriteState *writeState = (TableWriteState *) relationInfo->ri_FdwState; /* writeState is NULL during Explain queries */ if (writeState != NULL) @@ -2238,6 +2241,7 @@ CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) #if PG_VERSION_NUM >= 90600 + /* * CStoreIsForeignScanParallelSafe always returns true to indicate that * reading from a cstore_fdw table in a parallel worker is safe. This @@ -2254,4 +2258,6 @@ CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, { return true; } + + #endif diff --git a/cstore_fdw.h b/cstore_fdw.h index c7b4460ed..1c8170ae8 100644 --- a/cstore_fdw.h +++ b/cstore_fdw.h @@ -32,4 +32,4 @@ extern Datum cstore_clean_table_resources(PG_FUNCTION_ARGS); extern Datum cstore_fdw_handler(PG_FUNCTION_ARGS); extern Datum cstore_fdw_validator(PG_FUNCTION_ARGS); -#endif /* CSTORE_FDW_H */ +#endif /* CSTORE_FDW_H */ diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c index 94e3c3116..09c17ee7f 100644 --- a/cstore_metadata_serialization.c +++ b/cstore_metadata_serialization.c @@ -143,8 +143,8 @@ SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCou { ColumnBlockSkipNode blockSkipNode = blockSkipNodeArray[blockIndex]; Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; - ProtobufCBinaryData binaryMinimumValue = {0, 0}; - ProtobufCBinaryData binaryMaximumValue = {0, 0}; + ProtobufCBinaryData binaryMinimumValue = { 0, 0 }; + ProtobufCBinaryData binaryMaximumValue = { 0, 0 }; if (blockSkipNode.hasMinMax) { @@ -352,7 +352,7 @@ DeserializeRowCount(StringInfo buffer) for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = - protobufBlockSkipList->blockskipnodearray[blockIndex]; + protobufBlockSkipList->blockskipnodearray[blockIndex]; rowCount += protobufBlockSkipNode->rowcount; } @@ -452,7 +452,7 @@ DeserializeColumnSkipList(StringInfo buffer, bool typeByValue, int typeLength, static ProtobufCBinaryData DatumToProtobufBinary(Datum datum, bool datumTypeByValue, int datumTypeLength) { - ProtobufCBinaryData protobufBinary = {0, 0}; + ProtobufCBinaryData protobufBinary = { 0, 0 }; int datumLength = att_addlength_datum(0, datumTypeLength, datum); char *datumBuffer = palloc0(datumLength); diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h index d5b7c90ff..12a3d135b 100644 --- a/cstore_metadata_serialization.h +++ b/cstore_metadata_serialization.h @@ -31,4 +31,4 @@ extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, uint32 blockCount); -#endif /* CSTORE_SERIALIZATION_H */ +#endif /* CSTORE_SERIALIZATION_H */ diff --git a/cstore_reader.c b/cstore_reader.c index 6caf99bc7..78c7fe00e 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -67,7 +67,7 @@ static OpExpr * MakeOpExpression(Var *variable, int16 strategyNumber); static Oid GetOperatorByType(Oid typeId, Oid accessMethodId, int16 strategyNumber); static void UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue); static StripeSkipList * SelectedBlockSkipList(StripeSkipList *stripeSkipList, - bool *projectedColumnMask, + bool *projectedColumnMask, bool *selectedBlockMask); static uint32 StripeSkipListRowCount(StripeSkipList *stripeSkipList); static bool * ProjectedColumnMask(uint32 columnCount, List *projectedColumnList); @@ -104,7 +104,7 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, MemoryContext stripeReadContext = NULL; uint32 columnCount = 0; bool *projectedColumnMask = NULL; - ColumnBlockData **blockDataArray = NULL; + ColumnBlockData **blockDataArray = NULL; StringInfo tableFooterFilename = makeStringInfo(); appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); @@ -134,7 +134,7 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, columnCount = tupleDescriptor->natts; projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); blockDataArray = CreateEmptyBlockDataArray(columnCount, projectedColumnMask, - tableFooter->blockRowCount); + tableFooter->blockRowCount); readState = palloc0(sizeof(TableReadState)); readState->relationId = relationId; @@ -356,7 +356,7 @@ ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, uint32 blockRowCount) { uint32 columnIndex = 0; - ColumnBlockData **blockDataArray = palloc0(columnCount * sizeof(ColumnBlockData*)); + ColumnBlockData **blockDataArray = palloc0(columnCount * sizeof(ColumnBlockData *)); /* allocate block memory for deserialized data */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) @@ -448,12 +448,12 @@ StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata) uint64 rowCount = 0; StringInfo firstColumnSkipListBuffer = NULL; - StripeFooter * stripeFooter = ReadStripeFooter(relid, stripeMetadata->id, - RelationColumnCount(relid)); + StripeFooter *stripeFooter = ReadStripeFooter(relid, stripeMetadata->id, + RelationColumnCount(relid)); firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, - stripeFooter->skipListSizeArray[0]); - rowCount = DeserializeRowCount(firstColumnSkipListBuffer); + stripeFooter->skipListSizeArray[0]); + rowCount = DeserializeRowCount(firstColumnSkipListBuffer); return rowCount; } @@ -573,7 +573,7 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, ColumnBuffers *columnBuffers = NULL; uint32 blockIndex = 0; ColumnBlockBuffers **blockBuffersArray = - palloc0(blockCount * sizeof(ColumnBlockBuffers *)); + palloc0(blockCount * sizeof(ColumnBlockBuffers *)); for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { @@ -761,7 +761,8 @@ SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, constraintList = list_make1(baseConstraint); #if (PG_VERSION_NUM >= 100000) - predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList, false); + predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList, + false); #else predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList); #endif @@ -877,7 +878,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber) Oid accessMethodId = BTREE_AM_OID; Oid operatorId = InvalidOid; - Const *constantValue = NULL; + Const *constantValue = NULL; OpExpr *expression = NULL; /* Load the operator from system catalogs */ @@ -888,7 +889,7 @@ MakeOpExpression(Var *variable, int16 strategyNumber) /* Now make the expression with the given variable and a null constant */ expression = (OpExpr *) make_opclause(operatorId, InvalidOid, /* no result type yet */ - false, /* no return set */ + false, /* no return set */ (Expr *) variable, (Expr *) constantValue, InvalidOid, collationId); @@ -1163,7 +1164,8 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, if (columnBuffers != NULL) { - ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = NULL; /* free previous block's data buffers */ @@ -1214,7 +1216,6 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, { memset(blockData->existsArray, false, rowCount); } - } } } @@ -1330,8 +1331,6 @@ ReadFromFile(FILE *file, uint64 offset, uint32 size) } - - /* * ResetUncompressedBlockData iterates over deserialized column block data * and sets valueBuffer field to empty buffer. This field is allocated in stripe diff --git a/cstore_version_compat.h b/cstore_version_compat.h index 1b80b16c3..95521c1aa 100644 --- a/cstore_version_compat.h +++ b/cstore_version_compat.h @@ -22,7 +22,8 @@ #endif #if PG_VERSION_NUM < 110000 -#define ALLOCSET_DEFAULT_SIZES ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE +#define ALLOCSET_DEFAULT_SIZES ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, \ + ALLOCSET_DEFAULT_MAXSIZE #define ACLCHECK_OBJECT_TABLE ACL_KIND_CLASS #else #define ACLCHECK_OBJECT_TABLE OBJECT_TABLE @@ -46,9 +47,9 @@ #endif #if PG_VERSION_NUM < 120000 -#define TTS_EMPTY(slot) ((slot)->tts_isempty) +#define TTS_EMPTY(slot) ((slot)->tts_isempty) #define ExecForceStoreHeapTuple(tuple, slot, shouldFree) \ - ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); + ExecStoreTuple(newTuple, tupleSlot, InvalidBuffer, shouldFree); #define TableScanDesc HeapScanDesc #define table_beginscan heap_beginscan #define table_endscan heap_endscan diff --git a/cstore_writer.c b/cstore_writer.c index 76fc703f3..318d8d518 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -149,13 +149,15 @@ CStoreBeginWrite(Oid relationId, for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { FmgrInfo *comparisonFunction = NULL; - FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); + FormData_pg_attribute *attributeForm = TupleDescAttr(tupleDescriptor, + columnIndex); if (!attributeForm->attisdropped) { Oid typeId = attributeForm->atttypid; - comparisonFunction = GetFunctionInfoOrNull(typeId, BTREE_AM_OID, BTORDER_PROC); + comparisonFunction = GetFunctionInfoOrNull(typeId, BTREE_AM_OID, + BTORDER_PROC); } comparisonFunctionArray[columnIndex] = comparisonFunction; @@ -262,7 +264,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul bool columnTypeByValue = attributeForm->attbyval; int columnTypeLength = attributeForm->attlen; Oid columnCollation = attributeForm->attcollation; - char columnTypeAlign = attributeForm->attalign; + char columnTypeAlign = attributeForm->attalign; blockData->existsArray[blockRowIndex] = true; @@ -492,7 +494,7 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, static StripeMetadata FlushStripe(TableWriteState *writeState) { - StripeMetadata stripeMetadata = {0, 0, 0, 0}; + StripeMetadata stripeMetadata = { 0, 0, 0, 0 }; uint64 skipListLength = 0; uint64 dataLength = 0; StringInfo *skipListBufferArray = NULL; @@ -531,7 +533,7 @@ FlushStripe(TableWriteState *writeState) for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = - columnBuffers->blockBuffersArray[blockIndex]; + columnBuffers->blockBuffersArray[blockIndex]; uint64 existsBufferSize = blockBuffers->existsBuffer->len; uint64 valueBufferSize = blockBuffers->valueBuffer->len; CompressionType valueCompressionType = blockBuffers->valueCompressionType; @@ -582,7 +584,7 @@ FlushStripe(TableWriteState *writeState) for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = - columnBuffers->blockBuffersArray[blockIndex]; + columnBuffers->blockBuffersArray[blockIndex]; StringInfo existsBuffer = blockBuffers->existsBuffer; WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); @@ -591,7 +593,7 @@ FlushStripe(TableWriteState *writeState) for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { ColumnBlockBuffers *blockBuffers = - columnBuffers->blockBuffersArray[blockIndex]; + columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = blockBuffers->valueBuffer; WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); diff --git a/mod.c b/mod.c index dbc8eb923..d962e9820 100644 --- a/mod.c +++ b/mod.c @@ -20,14 +20,15 @@ PG_MODULE_MAGIC; -void _PG_init(void) +void +_PG_init(void) { cstore_fdw_init(); } -void _PG_fini(void) +void +_PG_fini(void) { cstore_fdw_finish(); } - From 9e247cdf40200cc85c813d3774575bb63829d886 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 7 Sep 2020 21:51:28 -0700 Subject: [PATCH 008/124] Move table footer to metadata tables --- cstore.c | 15 +- cstore.h | 18 +- cstore.proto | 22 --- cstore_fdw--1.7.sql | 30 ++- cstore_fdw.c | 17 +- cstore_metadata_serialization.c | 198 ------------------- cstore_metadata_serialization.h | 3 - cstore_metadata_tables.c | 332 +++++++++++++++++++++++++++++++- cstore_reader.c | 142 ++------------ cstore_writer.c | 139 +++---------- expected/truncate.out | 4 +- 11 files changed, 407 insertions(+), 513 deletions(-) diff --git a/cstore.c b/cstore.c index a259f0430..658c15745 100644 --- a/cstore.c +++ b/cstore.c @@ -130,6 +130,8 @@ InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *csto TableWriteState *writeState = NULL; TupleDesc tupleDescriptor = RelationGetDescr(relation); + InitCStoreTableMetadata(relationId, cstoreOptions->blockRowCount); + /* * Initialize state to write to the cstore file. This creates an * empty data file and a valid footer file for the table. @@ -183,19 +185,6 @@ void DeleteCStoreTableFiles(char *filename) { int dataFileRemoved = 0; - int footerFileRemoved = 0; - - StringInfo tableFooterFilename = makeStringInfo(); - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - /* delete the footer file */ - footerFileRemoved = unlink(tableFooterFilename->data); - if (footerFileRemoved != 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not delete file \"%s\": %m", - tableFooterFilename->data))); - } /* delete the data file */ dataFileRemoved = unlink(filename); diff --git a/cstore.h b/cstore.h index ed850d9ef..20cac7e05 100644 --- a/cstore.h +++ b/cstore.h @@ -46,8 +46,6 @@ /* miscellaneous defines */ #define CSTORE_FDW_NAME "cstore_fdw" -#define CSTORE_FOOTER_FILE_SUFFIX ".footer" -#define CSTORE_TEMP_FILE_SUFFIX ".tmp" #define CSTORE_TUPLE_COST_MULTIPLIER 10 #define CSTORE_POSTSCRIPT_SIZE_LENGTH 1 #define CSTORE_POSTSCRIPT_SIZE_MAX 256 @@ -91,12 +89,12 @@ typedef struct StripeMetadata } StripeMetadata; -/* TableFooter represents the footer of a cstore file. */ -typedef struct TableFooter +/* TableMetadata represents the metadata of a cstore file. */ +typedef struct TableMetadata { List *stripeMetadataList; uint64 blockRowCount; -} TableFooter; +} TableMetadata; /* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ @@ -206,7 +204,7 @@ typedef struct TableReadState Oid relationId; FILE *tableFile; - TableFooter *tableFooter; + TableMetadata *tableMetadata; TupleDesc tupleDescriptor; /* @@ -231,8 +229,7 @@ typedef struct TableWriteState { Oid relationId; FILE *tableFile; - TableFooter *tableFooter; - StringInfo tableFooterFilename; + TableMetadata *tableMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; @@ -277,7 +274,6 @@ extern void CStoreEndWrite(TableWriteState *state); extern TableReadState * CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, List *projectedColumnList, List *qualConditions); -extern TableFooter * CStoreReadFooter(StringInfo tableFooterFilename); extern bool CStoreReadFinished(TableReadState *state); extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, bool *columnNulls); @@ -298,6 +294,8 @@ extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressio /* cstore_metadata_tables.c */ extern void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer); extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount); - +extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); +extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); +extern TableMetadata * ReadTableMetadata(Oid relid); #endif /* CSTORE_H */ diff --git a/cstore.proto b/cstore.proto index ea949c77c..a7525b633 100644 --- a/cstore.proto +++ b/cstore.proto @@ -22,25 +22,3 @@ message ColumnBlockSkipNode { message ColumnBlockSkipList { repeated ColumnBlockSkipNode blockSkipNodeArray = 1; } - -message StripeMetadata { - optional uint64 fileOffset = 1; - optional uint64 skipListLength = 2; - optional uint64 dataLength = 3; - optional uint64 footerLength = 4; - optional uint64 id = 5; -} - -message TableFooter { - repeated StripeMetadata stripeMetadataArray = 1; - optional uint32 blockRowCount = 2; -} - -message PostScript { - optional uint64 tableFooterLength = 1; - optional uint64 versionMajor = 2; - optional uint64 versionMinor = 3; - - // Leave this last in the record - optional string magicNumber = 8000; -} diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index fd526e711..86589ca90 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -58,17 +58,37 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); +CREATE TABLE cstore_tables ( + relid oid, + block_row_count int, + version_major bigint, + version_minor bigint, + PRIMARY KEY (relid) +) WITH (user_catalog_table = true); + +ALTER TABLE cstore_tables SET SCHEMA pg_catalog; + +CREATE TABLE cstore_stripes ( + relid oid, + stripe bigint, + file_offset bigint, + skiplist_length bigint, + data_length bigint, + PRIMARY KEY (relid, stripe), + FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE +) WITH (user_catalog_table = true); + +ALTER TABLE cstore_stripes SET SCHEMA pg_catalog; + CREATE TABLE cstore_stripe_attr ( relid oid, stripe bigint, attr int, exists_size bigint, value_size bigint, - skiplist_size bigint + skiplist_size bigint, + PRIMARY KEY (relid, stripe, attr), + FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE ) WITH (user_catalog_table = true); -CREATE INDEX cstore_stripe_attr_idx - ON cstore_stripe_attr - USING BTREE(relid, stripe, attr); - ALTER TABLE cstore_stripe_attr SET SCHEMA pg_catalog; diff --git a/cstore_fdw.c b/cstore_fdw.c index 7d43c07d5..8ce3a7296 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -426,6 +426,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, RemoveCStoreDatabaseDirectory(databaseOid); } } + /* handle other utility statements */ else { @@ -1026,11 +1027,8 @@ cstore_table_size(PG_FUNCTION_ARGS) int64 tableSize = 0; CStoreOptions *cstoreOptions = NULL; char *dataFilename = NULL; - StringInfo footerFilename = NULL; int dataFileStatResult = 0; - int footerFileStatResult = 0; struct stat dataFileStatBuffer; - struct stat footerFileStatBuffer; bool cstoreTable = CStoreTable(relationId); if (!cstoreTable) @@ -1048,20 +1046,7 @@ cstore_table_size(PG_FUNCTION_ARGS) errmsg("could not stat file \"%s\": %m", dataFilename))); } - footerFilename = makeStringInfo(); - appendStringInfo(footerFilename, "%s%s", dataFilename, - CSTORE_FOOTER_FILE_SUFFIX); - - footerFileStatResult = stat(footerFilename->data, &footerFileStatBuffer); - if (footerFileStatResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - footerFilename->data))); - } - tableSize += dataFileStatBuffer.st_size; - tableSize += footerFileStatBuffer.st_size; PG_RETURN_INT64(tableSize); } diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c index 09c17ee7f..2b06d4a15 100644 --- a/cstore_metadata_serialization.c +++ b/cstore_metadata_serialization.c @@ -28,98 +28,6 @@ static Datum ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, bool typeByValue, int typeLength); -/* - * SerializePostScript serializes the given postscript and returns the result as - * a StringInfo. - */ -StringInfo -SerializePostScript(uint64 tableFooterLength) -{ - StringInfo postscriptBuffer = NULL; - Protobuf__PostScript protobufPostScript = PROTOBUF__POST_SCRIPT__INIT; - uint8 *postscriptData = NULL; - uint32 postscriptSize = 0; - - protobufPostScript.has_tablefooterlength = true; - protobufPostScript.tablefooterlength = tableFooterLength; - protobufPostScript.has_versionmajor = true; - protobufPostScript.versionmajor = CSTORE_VERSION_MAJOR; - protobufPostScript.has_versionminor = true; - protobufPostScript.versionminor = CSTORE_VERSION_MINOR; - protobufPostScript.magicnumber = pstrdup(CSTORE_MAGIC_NUMBER); - - postscriptSize = protobuf__post_script__get_packed_size(&protobufPostScript); - postscriptData = palloc0(postscriptSize); - protobuf__post_script__pack(&protobufPostScript, postscriptData); - - postscriptBuffer = palloc0(sizeof(StringInfoData)); - postscriptBuffer->len = postscriptSize; - postscriptBuffer->maxlen = postscriptSize; - postscriptBuffer->data = (char *) postscriptData; - - return postscriptBuffer; -} - - -/* - * SerializeTableFooter serializes the given table footer and returns the result - * as a StringInfo. - */ -StringInfo -SerializeTableFooter(TableFooter *tableFooter) -{ - StringInfo tableFooterBuffer = NULL; - Protobuf__TableFooter protobufTableFooter = PROTOBUF__TABLE_FOOTER__INIT; - Protobuf__StripeMetadata **stripeMetadataArray = NULL; - ListCell *stripeMetadataCell = NULL; - uint8 *tableFooterData = NULL; - uint32 tableFooterSize = 0; - uint32 stripeIndex = 0; - - List *stripeMetadataList = tableFooter->stripeMetadataList; - uint32 stripeCount = list_length(stripeMetadataList); - stripeMetadataArray = palloc0(stripeCount * sizeof(Protobuf__StripeMetadata *)); - - foreach(stripeMetadataCell, stripeMetadataList) - { - StripeMetadata *stripeMetadata = lfirst(stripeMetadataCell); - - Protobuf__StripeMetadata *protobufStripeMetadata = NULL; - protobufStripeMetadata = palloc0(sizeof(Protobuf__StripeMetadata)); - protobuf__stripe_metadata__init(protobufStripeMetadata); - protobufStripeMetadata->has_fileoffset = true; - protobufStripeMetadata->fileoffset = stripeMetadata->fileOffset; - protobufStripeMetadata->has_skiplistlength = true; - protobufStripeMetadata->skiplistlength = stripeMetadata->skipListLength; - protobufStripeMetadata->has_datalength = true; - protobufStripeMetadata->datalength = stripeMetadata->dataLength; - protobufStripeMetadata->has_footerlength = true; - protobufStripeMetadata->footerlength = stripeMetadata->footerLength; - protobufStripeMetadata->has_id = true; - protobufStripeMetadata->id = stripeMetadata->id; - - stripeMetadataArray[stripeIndex] = protobufStripeMetadata; - stripeIndex++; - } - - protobufTableFooter.n_stripemetadataarray = stripeCount; - protobufTableFooter.stripemetadataarray = stripeMetadataArray; - protobufTableFooter.has_blockrowcount = true; - protobufTableFooter.blockrowcount = tableFooter->blockRowCount; - - tableFooterSize = protobuf__table_footer__get_packed_size(&protobufTableFooter); - tableFooterData = palloc0(tableFooterSize); - protobuf__table_footer__pack(&protobufTableFooter, tableFooterData); - - tableFooterBuffer = palloc0(sizeof(StringInfoData)); - tableFooterBuffer->len = tableFooterSize; - tableFooterBuffer->maxlen = tableFooterSize; - tableFooterBuffer->data = (char *) tableFooterData; - - return tableFooterBuffer; -} - - /* * SerializeColumnSkipList serializes a column skip list, where the colum skip * list includes all block skip nodes for that column. The function then returns @@ -194,112 +102,6 @@ SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCou } -/* - * DeserializePostScript deserializes the given postscript buffer and returns - * the size of table footer in tableFooterLength pointer. - */ -void -DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength) -{ - Protobuf__PostScript *protobufPostScript = NULL; - protobufPostScript = protobuf__post_script__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufPostScript == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid postscript buffer"))); - } - - if (protobufPostScript->versionmajor != CSTORE_VERSION_MAJOR || - protobufPostScript->versionminor > CSTORE_VERSION_MINOR) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid column store version number"))); - } - else if (strncmp(protobufPostScript->magicnumber, CSTORE_MAGIC_NUMBER, - NAMEDATALEN) != 0) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid magic number"))); - } - - (*tableFooterLength) = protobufPostScript->tablefooterlength; - - protobuf__post_script__free_unpacked(protobufPostScript, NULL); -} - - -/* - * DeserializeTableFooter deserializes the given buffer and returns the result as - * a TableFooter struct. - */ -TableFooter * -DeserializeTableFooter(StringInfo buffer) -{ - TableFooter *tableFooter = NULL; - Protobuf__TableFooter *protobufTableFooter = NULL; - List *stripeMetadataList = NIL; - uint64 blockRowCount = 0; - uint32 stripeCount = 0; - uint32 stripeIndex = 0; - - protobufTableFooter = protobuf__table_footer__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufTableFooter == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid table footer buffer"))); - } - - if (!protobufTableFooter->has_blockrowcount) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("missing required table footer metadata fields"))); - } - else if (protobufTableFooter->blockrowcount < BLOCK_ROW_COUNT_MINIMUM || - protobufTableFooter->blockrowcount > BLOCK_ROW_COUNT_MAXIMUM) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid block row count"))); - } - blockRowCount = protobufTableFooter->blockrowcount; - - stripeCount = protobufTableFooter->n_stripemetadataarray; - for (stripeIndex = 0; stripeIndex < stripeCount; stripeIndex++) - { - StripeMetadata *stripeMetadata = NULL; - Protobuf__StripeMetadata *protobufStripeMetadata = NULL; - - protobufStripeMetadata = protobufTableFooter->stripemetadataarray[stripeIndex]; - if (!protobufStripeMetadata->has_fileoffset || - !protobufStripeMetadata->has_skiplistlength || - !protobufStripeMetadata->has_datalength || - !protobufStripeMetadata->has_footerlength) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("missing required stripe metadata fields"))); - } - - stripeMetadata = palloc0(sizeof(StripeMetadata)); - stripeMetadata->fileOffset = protobufStripeMetadata->fileoffset; - stripeMetadata->skipListLength = protobufStripeMetadata->skiplistlength; - stripeMetadata->dataLength = protobufStripeMetadata->datalength; - stripeMetadata->footerLength = protobufStripeMetadata->footerlength; - stripeMetadata->id = protobufStripeMetadata->id; - - stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); - } - - protobuf__table_footer__free_unpacked(protobufTableFooter, NULL); - - tableFooter = palloc0(sizeof(TableFooter)); - tableFooter->stripeMetadataList = stripeMetadataList; - tableFooter->blockRowCount = blockRowCount; - - return tableFooter; -} - - /* * DeserializeBlockCount deserializes the given column skip list buffer and * returns the number of blocks in column skip list. diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h index 12a3d135b..efd27000a 100644 --- a/cstore_metadata_serialization.h +++ b/cstore_metadata_serialization.h @@ -15,15 +15,12 @@ #define CSTORE_SERIALIZATION_H /* Function declarations for metadata serialization */ -extern StringInfo SerializePostScript(uint64 tableFooterLength); -extern StringInfo SerializeTableFooter(TableFooter *tableFooter); extern StringInfo SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, bool typeByValue, int typeLength); /* Function declarations for metadata deserialization */ extern void DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength); -extern TableFooter * DeserializeTableFooter(StringInfo buffer); extern uint32 DeserializeBlockCount(StringInfo buffer); extern uint32 DeserializeRowCount(StringInfo buffer); extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index e2d003989..f5168de1e 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -13,13 +13,21 @@ #include "cstore_version_compat.h" #include +#include "access/heapam.h" #include "access/nbtree.h" #include "access/table.h" +#include "access/tableam.h" #include "access/xact.h" #include "catalog/indexing.h" #include "catalog/pg_namespace.h" #include "catalog/pg_collation.h" +#include "catalog/pg_type.h" #include "commands/defrem.h" +#include "commands/trigger.h" +#include "executor/executor.h" +#include "executor/spi.h" +#include "miscadmin.h" +#include "nodes/execnodes.h" #include "lib/stringinfo.h" #include "optimizer/optimizer.h" #include "port.h" @@ -33,9 +41,16 @@ static Oid CStoreStripeAttrRelationId(void); static Oid CStoreStripeAttrIndexRelationId(void); +static Oid CStoreStripesRelationId(void); +static Oid CStoreStripesIndexRelationId(void); +static Oid CStoreTablesRelationId(void); +static Oid CStoreTablesIndexRelationId(void); static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, uint64 existsSize, uint64 valuesSize, uint64 skiplistSize); +static int TableBlockRowCount(Oid relid); +static void DeleteTableMetadataRowIfExists(Oid relid); +static EState * create_estate_for_relation(Relation rel); /* constants for cstore_stripe_attr */ #define Natts_cstore_stripe_attr 6 @@ -46,6 +61,275 @@ static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, #define Anum_cstore_stripe_attr_value_size 5 #define Anum_cstore_stripe_attr_skiplist_size 6 +/* constants for cstore_table */ +#define Natts_cstore_tables 4 +#define Anum_cstore_tables_relid 1 +#define Anum_cstore_tables_block_row_count 2 +#define Anum_cstore_tables_version_major 3 +#define Anum_cstore_tables_version_minor 4 + +/* constants for cstore_stripe */ +#define Natts_cstore_stripes 5 +#define Anum_cstore_stripes_relid 1 +#define Anum_cstore_stripes_stripe 2 +#define Anum_cstore_stripes_file_offset 3 +#define Anum_cstore_stripes_skiplist_length 4 +#define Anum_cstore_stripes_data_length 5 + +/* + * InitCStoreTableMetadata adds a record for the given relation in cstore_table. + */ +void +InitCStoreTableMetadata(Oid relid, int blockRowCount) +{ + Oid cstoreTableOid = InvalidOid; + Relation cstoreTable = NULL; + TupleDesc tupleDescriptor = NULL; + HeapTuple tuple = NULL; + + bool nulls[Natts_cstore_tables] = { 0 }; + Datum values[Natts_cstore_tables] = { + ObjectIdGetDatum(relid), + Int32GetDatum(blockRowCount), + Int32GetDatum(CSTORE_VERSION_MAJOR), + Int32GetDatum(CSTORE_VERSION_MINOR) + }; + + DeleteTableMetadataRowIfExists(relid); + + cstoreTableOid = CStoreTablesRelationId(); + cstoreTable = heap_open(cstoreTableOid, RowExclusiveLock); + tupleDescriptor = RelationGetDescr(cstoreTable); + + tuple = heap_form_tuple(tupleDescriptor, values, nulls); + + CatalogTupleInsert(cstoreTable, tuple); + + CommandCounterIncrement(); + + heap_close(cstoreTable, NoLock); +} + + +/* + * InsertStripeMetadataRow adds a row to cstore_stripes. + */ +void +InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) +{ + bool nulls[Natts_cstore_stripes] = { 0 }; + Datum values[Natts_cstore_stripes] = { + ObjectIdGetDatum(relid), + Int64GetDatum(stripe->id), + Int64GetDatum(stripe->fileOffset), + Int64GetDatum(stripe->skipListLength), + Int64GetDatum(stripe->dataLength) + }; + + Oid cstoreStripesOid = CStoreStripesRelationId(); + Relation cstoreStripes = heap_open(cstoreStripesOid, RowExclusiveLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripes); + + HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + + CatalogTupleInsert(cstoreStripes, tuple); + + CommandCounterIncrement(); + + heap_close(cstoreStripes, NoLock); +} + + +/* + * ReadTableMetadata constructs TableMetadata for a given relid by reading + * from cstore_tables and cstore_stripes. + */ +TableMetadata * +ReadTableMetadata(Oid relid) +{ + Oid cstoreStripesOid = InvalidOid; + Relation cstoreStripes = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple; + + TableMetadata *tableMetadata = palloc0(sizeof(TableMetadata)); + tableMetadata->blockRowCount = TableBlockRowCount(relid); + + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + + cstoreStripesOid = CStoreStripesRelationId(); + cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); + index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); + tupleDescriptor = RelationGetDescr(cstoreStripes); + + scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, scanKey); + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + StripeMetadata *stripeMetadata = NULL; + Datum datumArray[Natts_cstore_stripes]; + bool isNullArray[Natts_cstore_stripes]; + + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + + stripeMetadata = palloc0(sizeof(StripeMetadata)); + stripeMetadata->id = DatumGetInt64(datumArray[Anum_cstore_stripes_stripe - 1]); + stripeMetadata->fileOffset = DatumGetInt64( + datumArray[Anum_cstore_stripes_file_offset - 1]); + stripeMetadata->dataLength = DatumGetInt64( + datumArray[Anum_cstore_stripes_data_length - 1]); + stripeMetadata->skipListLength = DatumGetInt64( + datumArray[Anum_cstore_stripes_skiplist_length - 1]); + + tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, + stripeMetadata); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreStripes, NoLock); + + return tableMetadata; +} + + +/* + * TableBlockRowCount returns block_row_count column from cstore_tables for a given relid. + */ +static int +TableBlockRowCount(Oid relid) +{ + int blockRowCount = 0; + Oid cstoreTablesOid = InvalidOid; + Relation cstoreTables = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple = NULL; + + ScanKeyInit(&scanKey[0], Anum_cstore_tables_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + + cstoreTablesOid = CStoreTablesRelationId(); + cstoreTables = heap_open(cstoreTablesOid, AccessShareLock); + index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); + tupleDescriptor = RelationGetDescr(cstoreTables); + + scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (HeapTupleIsValid(heapTuple)) + { + Datum datumArray[Natts_cstore_tables]; + bool isNullArray[Natts_cstore_tables]; + heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); + blockRowCount = DatumGetInt32(datumArray[Anum_cstore_tables_block_row_count - 1]); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreTables, NoLock); + + return blockRowCount; +} + + +/* + * DeleteTableMetadataRowIfExists removes the row with given relid from cstore_stripes. + */ +static void +DeleteTableMetadataRowIfExists(Oid relid) +{ + Oid cstoreTablesOid = InvalidOid; + Relation cstoreTables = NULL; + Relation index = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple = NULL; + + ScanKeyInit(&scanKey[0], Anum_cstore_tables_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + + cstoreTablesOid = CStoreTablesRelationId(); + cstoreTables = table_open(cstoreTablesOid, AccessShareLock); + index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); + + scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (HeapTupleIsValid(heapTuple)) + { + EState *estate = create_estate_for_relation(cstoreTables); + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + + ItemPointer tid = &(heapTuple->t_self); + simple_table_tuple_delete(cstoreTables, tid, estate->es_snapshot); + + /* + * Execute AFTER ROW DELETE Triggers to enforce foreign key + * constraints. + */ + ExecARDeleteTriggers(estate, resultRelInfo, + tid, NULL, NULL); + + AfterTriggerEndQuery(estate); + ExecCleanUpTriggerState(estate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + table_close(cstoreTables, NoLock); +} + + +/* + * Based on a similar function from + * postgres/src/backend/replication/logical/worker.c. + * + * Executor state preparation for evaluation of constraint expressions, + * indexes and triggers. + * + * This is based on similar code in copy.c + */ +static EState * +create_estate_for_relation(Relation rel) +{ + EState *estate; + ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + + estate = CreateExecutorState(); + + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(rel); + rte->relkind = rel->rd_rel->relkind; + rte->rellockmode = AccessShareLock; + ExecInitRangeTable(estate, list_make1(rte)); + + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); + + estate->es_result_relations = resultRelInfo; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = resultRelInfo; + + estate->es_output_cid = GetCurrentCommandId(true); + + /* Prepare to catch AFTER triggers. */ + AfterTriggerBeginQuery(); + + return estate; +} + + /* * SaveStripeFooter stores give StripeFooter as cstore_stripe_attr records. */ @@ -176,11 +460,55 @@ CStoreStripeAttrRelationId(void) /* - * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr_idx. + * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr_pkey. * TODO: should we cache this similar to citus? */ static Oid CStoreStripeAttrIndexRelationId(void) { - return get_relname_relid("cstore_stripe_attr_idx", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripe_attr_pkey", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreStripesRelationId returns relation id of cstore_stripes. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripesRelationId(void) +{ + return get_relname_relid("cstore_stripes", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreStripesIndexRelationId returns relation id of cstore_stripes_idx. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreStripesIndexRelationId(void) +{ + return get_relname_relid("cstore_stripes_pkey", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreTablesRelationId returns relation id of cstore_tables. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreTablesRelationId(void) +{ + return get_relname_relid("cstore_tables", PG_CATALOG_NAMESPACE); +} + + +/* + * CStoreTablesIndexRelationId returns relation id of cstore_tables_idx. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreTablesIndexRelationId(void) +{ + return get_relname_relid("cstore_tables_pkey", PG_CATALOG_NAMESPACE); } diff --git a/cstore_reader.c b/cstore_reader.c index 78c7fe00e..ddef3395a 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -82,7 +82,6 @@ static void DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex TupleDesc tupleDescriptor); static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm); -static int64 FILESize(FILE *file); static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size); static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount); @@ -99,20 +98,14 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; - TableFooter *tableFooter = NULL; + TableMetadata *tableMetadata = NULL; FILE *tableFile = NULL; MemoryContext stripeReadContext = NULL; uint32 columnCount = 0; bool *projectedColumnMask = NULL; ColumnBlockData **blockDataArray = NULL; - StringInfo tableFooterFilename = makeStringInfo(); - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - tableFooter = CStoreReadFooter(tableFooterFilename); - - pfree(tableFooterFilename->data); - pfree(tableFooterFilename); + tableMetadata = ReadTableMetadata(relationId); tableFile = AllocateFile(filename, PG_BINARY_R); if (tableFile == NULL) @@ -134,12 +127,12 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, columnCount = tupleDescriptor->natts; projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); blockDataArray = CreateEmptyBlockDataArray(columnCount, projectedColumnMask, - tableFooter->blockRowCount); + tableMetadata->blockRowCount); readState = palloc0(sizeof(TableReadState)); readState->relationId = relationId; readState->tableFile = tableFile; - readState->tableFooter = tableFooter; + readState->tableMetadata = tableMetadata; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; readState->stripeBuffers = NULL; @@ -154,76 +147,6 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, } -/* - * CStoreReadFooter reads the cstore file footer from the given file. First, the - * function reads the last byte of the file as the postscript size. Then, the - * function reads the postscript. Last, the function reads and deserializes the - * footer. - */ -TableFooter * -CStoreReadFooter(StringInfo tableFooterFilename) -{ - TableFooter *tableFooter = NULL; - FILE *tableFooterFile = NULL; - uint64 footerOffset = 0; - uint64 footerLength = 0; - StringInfo postscriptBuffer = NULL; - StringInfo postscriptSizeBuffer = NULL; - uint64 postscriptSizeOffset = 0; - uint8 postscriptSize = 0; - uint64 footerFileSize = 0; - uint64 postscriptOffset = 0; - StringInfo footerBuffer = NULL; - int freeResult = 0; - - tableFooterFile = AllocateFile(tableFooterFilename->data, PG_BINARY_R); - if (tableFooterFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for reading: %m", - tableFooterFilename->data), - errhint("Try copying in data to the table."))); - } - - footerFileSize = FILESize(tableFooterFile); - if (footerFileSize < CSTORE_POSTSCRIPT_SIZE_LENGTH) - { - ereport(ERROR, (errmsg("invalid cstore file"))); - } - - postscriptSizeOffset = footerFileSize - CSTORE_POSTSCRIPT_SIZE_LENGTH; - postscriptSizeBuffer = ReadFromFile(tableFooterFile, postscriptSizeOffset, - CSTORE_POSTSCRIPT_SIZE_LENGTH); - memcpy(&postscriptSize, postscriptSizeBuffer->data, CSTORE_POSTSCRIPT_SIZE_LENGTH); - if (postscriptSize + CSTORE_POSTSCRIPT_SIZE_LENGTH > footerFileSize) - { - ereport(ERROR, (errmsg("invalid postscript size"))); - } - - postscriptOffset = footerFileSize - (CSTORE_POSTSCRIPT_SIZE_LENGTH + postscriptSize); - postscriptBuffer = ReadFromFile(tableFooterFile, postscriptOffset, postscriptSize); - - DeserializePostScript(postscriptBuffer, &footerLength); - if (footerLength + postscriptSize + CSTORE_POSTSCRIPT_SIZE_LENGTH > footerFileSize) - { - ereport(ERROR, (errmsg("invalid footer size"))); - } - - footerOffset = postscriptOffset - footerLength; - footerBuffer = ReadFromFile(tableFooterFile, footerOffset, footerLength); - tableFooter = DeserializeTableFooter(footerBuffer); - - freeResult = FreeFile(tableFooterFile); - if (freeResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not close file: %m"))); - } - - return tableFooter; -} - - /* * CStoreReadNextRow tries to read a row from the cstore file. On success, it sets * column values and nulls, and returns true. If there are no more rows to read, @@ -234,7 +157,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { uint32 blockIndex = 0; uint32 blockRowIndex = 0; - TableFooter *tableFooter = readState->tableFooter; + TableMetadata *tableMetadata = readState->tableMetadata; MemoryContext oldContext = NULL; /* @@ -247,7 +170,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { StripeBuffers *stripeBuffers = NULL; StripeMetadata *stripeMetadata = NULL; - List *stripeMetadataList = tableFooter->stripeMetadataList; + List *stripeMetadataList = tableMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); StripeFooter *stripeFooter = NULL; @@ -284,8 +207,8 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu } } - blockIndex = readState->stripeReadRowCount / tableFooter->blockRowCount; - blockRowIndex = readState->stripeReadRowCount % tableFooter->blockRowCount; + blockIndex = readState->stripeReadRowCount / tableMetadata->blockRowCount; + blockRowIndex = readState->stripeReadRowCount % tableMetadata->blockRowCount; if (blockIndex != readState->deserializedBlockIndex) { @@ -294,14 +217,14 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu uint32 stripeRowCount = 0; stripeRowCount = readState->stripeBuffers->rowCount; - lastBlockIndex = stripeRowCount / tableFooter->blockRowCount; + lastBlockIndex = stripeRowCount / tableMetadata->blockRowCount; if (blockIndex == lastBlockIndex) { - blockRowCount = stripeRowCount % tableFooter->blockRowCount; + blockRowCount = stripeRowCount % tableMetadata->blockRowCount; } else { - blockRowCount = tableFooter->blockRowCount; + blockRowCount = tableMetadata->blockRowCount; } oldContext = MemoryContextSwitchTo(readState->stripeReadContext); @@ -341,9 +264,9 @@ CStoreEndRead(TableReadState *readState) MemoryContextDelete(readState->stripeReadContext); FreeFile(readState->tableFile); - list_free_deep(readState->tableFooter->stripeMetadataList); + list_free_deep(readState->tableMetadata->stripeMetadataList); FreeColumnBlockDataArray(readState->blockDataArray, columnCount); - pfree(readState->tableFooter); + pfree(readState->tableMetadata); pfree(readState); } @@ -405,19 +328,12 @@ FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) uint64 CStoreTableRowCount(Oid relid, const char *filename) { - TableFooter *tableFooter = NULL; + TableMetadata *tableMetadata = NULL; FILE *tableFile; ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - StringInfo tableFooterFilename = makeStringInfo(); - - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - tableFooter = CStoreReadFooter(tableFooterFilename); - - pfree(tableFooterFilename->data); - pfree(tableFooterFilename); + tableMetadata = ReadTableMetadata(relid); tableFile = AllocateFile(filename, PG_BINARY_R); if (tableFile == NULL) @@ -426,7 +342,7 @@ CStoreTableRowCount(Oid relid, const char *filename) errmsg("could not open file \"%s\" for reading: %m", filename))); } - foreach(stripeMetadataCell, tableFooter->stripeMetadataList) + foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); totalRowCount += StripeRowCount(relid, tableFile, stripeMetadata); @@ -1263,32 +1179,6 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor } -/* Returns the size of the given file handle. */ -static int64 -FILESize(FILE *file) -{ - int64 fileSize = 0; - int fseekResult = 0; - - errno = 0; - fseekResult = fseeko(file, 0, SEEK_END); - if (fseekResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek in file: %m"))); - } - - fileSize = ftello(file); - if (fileSize == -1) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not get position in file: %m"))); - } - - return fileSize; -} - - /* Reads the given segment from the given file. */ static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size) diff --git a/cstore_writer.c b/cstore_writer.c index 318d8d518..240c13fc2 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -27,7 +27,6 @@ #include "cstore_metadata_serialization.h" #include "cstore_version_compat.h" -static void CStoreWriteFooter(StringInfo footerFileName, TableFooter *tableFooter); static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount); @@ -50,7 +49,7 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, int columnTypeLength, Oid columnCollation, FmgrInfo *comparisonFunction); static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); -static void AppendStripeMetadata(TableFooter *tableFooter, +static void AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata); static void WriteToFile(FILE *file, void *data, uint32 dataLength); static void SyncAndCloseFile(FILE *file); @@ -72,61 +71,37 @@ CStoreBeginWrite(Oid relationId, { TableWriteState *writeState = NULL; FILE *tableFile = NULL; - StringInfo tableFooterFilename = NULL; - TableFooter *tableFooter = NULL; + TableMetadata *tableMetadata = NULL; FmgrInfo **comparisonFunctionArray = NULL; MemoryContext stripeWriteContext = NULL; uint64 currentFileOffset = 0; uint32 columnCount = 0; uint32 columnIndex = 0; - struct stat statBuffer; - int statResult = 0; bool *columnMaskArray = NULL; ColumnBlockData **blockData = NULL; uint64 currentStripeId = 0; - tableFooterFilename = makeStringInfo(); - appendStringInfo(tableFooterFilename, "%s%s", filename, CSTORE_FOOTER_FILE_SUFFIX); - - statResult = stat(tableFooterFilename->data, &statBuffer); - if (statResult < 0) + tableFile = AllocateFile(filename, "a+"); + if (tableFile == NULL) { - tableFile = AllocateFile(filename, "w"); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - filename))); - } - - tableFooter = palloc0(sizeof(TableFooter)); - tableFooter->blockRowCount = blockRowCount; - tableFooter->stripeMetadataList = NIL; + ereport(ERROR, (errcode_for_file_access(), + errmsg("could not open file \"%s\" for writing: %m", + filename))); } - else - { - tableFile = AllocateFile(filename, "r+"); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - filename))); - } - tableFooter = CStoreReadFooter(tableFooterFilename); - } + tableMetadata = ReadTableMetadata(relationId); /* * If stripeMetadataList is not empty, jump to the position right after * the last position. */ - if (tableFooter->stripeMetadataList != NIL) + if (tableMetadata->stripeMetadataList != NIL) { StripeMetadata *lastStripe = NULL; uint64 lastStripeSize = 0; int fseekResult = 0; - lastStripe = llast(tableFooter->stripeMetadataList); + lastStripe = llast(tableMetadata->stripeMetadataList); lastStripeSize += lastStripe->skipListLength; lastStripeSize += lastStripe->dataLength; lastStripeSize += lastStripe->footerLength; @@ -180,8 +155,7 @@ CStoreBeginWrite(Oid relationId, writeState = palloc0(sizeof(TableWriteState)); writeState->relationId = relationId; writeState->tableFile = tableFile; - writeState->tableFooterFilename = tableFooterFilename; - writeState->tableFooter = tableFooter; + writeState->tableMetadata = tableMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; writeState->tupleDescriptor = tupleDescriptor; @@ -215,8 +189,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; - TableFooter *tableFooter = writeState->tableFooter; - const uint32 blockRowCount = tableFooter->blockRowCount; + TableMetadata *tableMetadata = writeState->tableMetadata; + const uint32 blockRowCount = tableMetadata->blockRowCount; ColumnBlockData **blockDataArray = writeState->blockDataArray; MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); @@ -304,7 +278,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul * doesn't free it. */ MemoryContextSwitchTo(oldContext); - AppendStripeMetadata(tableFooter, stripeMetadata); + InsertStripeMetadataRow(writeState->relationId, &stripeMetadata); + AppendStripeMetadata(tableMetadata, stripeMetadata); } else { @@ -322,9 +297,6 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul void CStoreEndWrite(TableWriteState *writeState) { - StringInfo tableFooterFilename = NULL; - StringInfo tempTableFooterFileName = NULL; - int renameResult = 0; int columnCount = writeState->tupleDescriptor->natts; StripeBuffers *stripeBuffers = writeState->stripeBuffers; @@ -336,85 +308,20 @@ CStoreEndWrite(TableWriteState *writeState) MemoryContextReset(writeState->stripeWriteContext); MemoryContextSwitchTo(oldContext); - AppendStripeMetadata(writeState->tableFooter, stripeMetadata); + InsertStripeMetadataRow(writeState->relationId, &stripeMetadata); + AppendStripeMetadata(writeState->tableMetadata, stripeMetadata); } SyncAndCloseFile(writeState->tableFile); - tableFooterFilename = writeState->tableFooterFilename; - tempTableFooterFileName = makeStringInfo(); - appendStringInfo(tempTableFooterFileName, "%s%s", tableFooterFilename->data, - CSTORE_TEMP_FILE_SUFFIX); - - CStoreWriteFooter(tempTableFooterFileName, writeState->tableFooter); - - renameResult = rename(tempTableFooterFileName->data, tableFooterFilename->data); - if (renameResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not rename file \"%s\" to \"%s\": %m", - tempTableFooterFileName->data, - tableFooterFilename->data))); - } - - pfree(tempTableFooterFileName->data); - pfree(tempTableFooterFileName); - MemoryContextDelete(writeState->stripeWriteContext); - list_free_deep(writeState->tableFooter->stripeMetadataList); - pfree(writeState->tableFooter); - pfree(writeState->tableFooterFilename->data); - pfree(writeState->tableFooterFilename); + list_free_deep(writeState->tableMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); FreeColumnBlockDataArray(writeState->blockDataArray, columnCount); pfree(writeState); } -/* - * CStoreWriteFooter writes the given footer to given file. First, the function - * serializes and writes the footer to the file. Then, the function serializes - * and writes the postscript. Then, the function writes the postscript size as - * the last byte of the file. Last, the function syncs and closes the footer file. - */ -static void -CStoreWriteFooter(StringInfo tableFooterFilename, TableFooter *tableFooter) -{ - FILE *tableFooterFile = NULL; - StringInfo tableFooterBuffer = NULL; - StringInfo postscriptBuffer = NULL; - uint8 postscriptSize = 0; - - tableFooterFile = AllocateFile(tableFooterFilename->data, PG_BINARY_W); - if (tableFooterFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - tableFooterFilename->data))); - } - - /* write the footer */ - tableFooterBuffer = SerializeTableFooter(tableFooter); - WriteToFile(tableFooterFile, tableFooterBuffer->data, tableFooterBuffer->len); - - /* write the postscript */ - postscriptBuffer = SerializePostScript(tableFooterBuffer->len); - WriteToFile(tableFooterFile, postscriptBuffer->data, postscriptBuffer->len); - - /* write the 1-byte postscript size */ - Assert(postscriptBuffer->len < CSTORE_POSTSCRIPT_SIZE_MAX); - postscriptSize = postscriptBuffer->len; - WriteToFile(tableFooterFile, &postscriptSize, CSTORE_POSTSCRIPT_SIZE_LENGTH); - - SyncAndCloseFile(tableFooterFile); - - pfree(tableFooterBuffer->data); - pfree(tableFooterBuffer); - pfree(postscriptBuffer->data); - pfree(postscriptBuffer); -} - - /* * CreateEmptyStripeBuffers allocates an empty StripeBuffers structure with the given * column count. @@ -501,7 +408,7 @@ FlushStripe(TableWriteState *writeState) StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; - TableFooter *tableFooter = writeState->tableFooter; + TableMetadata *tableMetadata = writeState->tableMetadata; FILE *tableFile = writeState->tableFile; StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; @@ -509,7 +416,7 @@ FlushStripe(TableWriteState *writeState) TupleDesc tupleDescriptor = writeState->tupleDescriptor; uint32 columnCount = tupleDescriptor->natts; uint32 blockCount = stripeSkipList->blockCount; - uint32 blockRowCount = tableFooter->blockRowCount; + uint32 blockRowCount = tableMetadata->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; @@ -918,13 +825,13 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) * table footer's stripeMetadataList. */ static void -AppendStripeMetadata(TableFooter *tableFooter, StripeMetadata stripeMetadata) +AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata) { StripeMetadata *stripeMetadataCopy = palloc0(sizeof(StripeMetadata)); memcpy(stripeMetadataCopy, &stripeMetadata, sizeof(StripeMetadata)); - tableFooter->stripeMetadataList = lappend(tableFooter->stripeMetadataList, - stripeMetadataCopy); + tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, + stripeMetadataCopy); } diff --git a/expected/truncate.out b/expected/truncate.out index e16a6ea9f..14119c804 100644 --- a/expected/truncate.out +++ b/expected/truncate.out @@ -72,7 +72,7 @@ SELECT count(*) FROM cstore_truncate_test_compressed; SELECT cstore_table_size('cstore_truncate_test_compressed'); cstore_table_size ------------------- - 26 + 0 (1 row) -- make sure data files still present @@ -82,7 +82,7 @@ SELECT count(*) FROM ( ) AS q1) AS q2; count ------- - 6 + 3 (1 row) INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; From 10fd94a9e3090fef1628fcc10e7fa32cd909edef Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 8 Sep 2020 19:03:01 -0700 Subject: [PATCH 009/124] Address feedback --- cstore_fdw--1.7.sql | 34 ++++++++--------- cstore_metadata_tables.c | 79 ++++++++++++++++++++++++++++++---------- 2 files changed, 76 insertions(+), 37 deletions(-) diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 86589ca90..726085b17 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -59,36 +59,36 @@ CREATE EVENT TRIGGER cstore_drop_event EXECUTE PROCEDURE cstore_drop_trigger(); CREATE TABLE cstore_tables ( - relid oid, - block_row_count int, - version_major bigint, - version_minor bigint, + relid oid NOT NULL, + block_row_count int NOT NULL, + version_major bigint NOT NULL, + version_minor bigint NOT NULL, PRIMARY KEY (relid) ) WITH (user_catalog_table = true); ALTER TABLE cstore_tables SET SCHEMA pg_catalog; CREATE TABLE cstore_stripes ( - relid oid, - stripe bigint, - file_offset bigint, - skiplist_length bigint, - data_length bigint, + relid oid NOT NULL, + stripe bigint NOT NULL, + file_offset bigint NOT NULL, + skiplist_length bigint NOT NULL, + data_length bigint NOT NULL, PRIMARY KEY (relid, stripe), - FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE + FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); ALTER TABLE cstore_stripes SET SCHEMA pg_catalog; CREATE TABLE cstore_stripe_attr ( - relid oid, - stripe bigint, - attr int, - exists_size bigint, - value_size bigint, - skiplist_size bigint, + relid oid NOT NULL, + stripe bigint NOT NULL, + attr int NOT NULL, + exists_size bigint NOT NULL, + value_size bigint NOT NULL, + skiplist_size bigint NOT NULL, PRIMARY KEY (relid, stripe, attr), - FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE + FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); ALTER TABLE cstore_stripe_attr SET SCHEMA pg_catalog; diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index f5168de1e..5c381a029 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -50,6 +50,8 @@ static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, uint64 skiplistSize); static int TableBlockRowCount(Oid relid); static void DeleteTableMetadataRowIfExists(Oid relid); +static void InsertTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple); +static void DeleteTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple); static EState * create_estate_for_relation(Relation rel); /* constants for cstore_stripe_attr */ @@ -103,7 +105,7 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) tuple = heap_form_tuple(tupleDescriptor, values, nulls); - CatalogTupleInsert(cstoreTable, tuple); + InsertTupleAndEnforceConstraints(cstoreTable, tuple); CommandCounterIncrement(); @@ -132,7 +134,7 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); - CatalogTupleInsert(cstoreStripes, tuple); + InsertTupleAndEnforceConstraints(cstoreStripes, tuple); CommandCounterIncrement(); @@ -264,23 +266,7 @@ DeleteTableMetadataRowIfExists(Oid relid) heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { - EState *estate = create_estate_for_relation(cstoreTables); - ResultRelInfo *resultRelInfo = estate->es_result_relation_info; - - ItemPointer tid = &(heapTuple->t_self); - simple_table_tuple_delete(cstoreTables, tid, estate->es_snapshot); - - /* - * Execute AFTER ROW DELETE Triggers to enforce foreign key - * constraints. - */ - ExecARDeleteTriggers(estate, resultRelInfo, - tid, NULL, NULL); - - AfterTriggerEndQuery(estate); - ExecCleanUpTriggerState(estate); - ExecResetTupleTable(estate->es_tupleTable, false); - FreeExecutorState(estate); + DeleteTupleAndEnforceConstraints(cstoreTables, heapTuple); } systable_endscan_ordered(scanDescriptor); @@ -289,6 +275,59 @@ DeleteTableMetadataRowIfExists(Oid relid) } +/* + * InsertTupleAndEnforceConstraints inserts a tuple into a relation and + * makes sure constraints (e.g. FK constraints, NOT NULL, ...) are enforced. + */ +static void +InsertTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple) +{ + EState *estate = NULL; + TupleTableSlot *slot = NULL; + + estate = create_estate_for_relation(rel); + slot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel), &TTSOpsHeapTuple); + ExecStoreHeapTuple(heapTuple, slot, false); + + ExecOpenIndices(estate->es_result_relation_info, false); + + /* ExecSimpleRelationInsert executes any constraints */ + ExecSimpleRelationInsert(estate, slot); + + ExecCloseIndices(estate->es_result_relation_info); + + AfterTriggerEndQuery(estate); + ExecCleanUpTriggerState(estate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); +} + + + +/* + * DeleteTupleAndEnforceConstraints deletes a tuple from a relation and + * makes sure constraints (e.g. FK constraints) are enforced. + */ +static void +DeleteTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple) +{ + EState *estate = create_estate_for_relation(rel); + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + + ItemPointer tid = &(heapTuple->t_self); + simple_table_tuple_delete(rel, tid, estate->es_snapshot); + + /* execute AFTER ROW DELETE Triggers to enforce constraints */ + ExecARDeleteTriggers(estate, resultRelInfo, + tid, NULL, NULL); + + AfterTriggerEndQuery(estate); + ExecCleanUpTriggerState(estate); + ExecResetTupleTable(estate->es_tupleTable, false); + FreeExecutorState(estate); +} + + /* * Based on a similar function from * postgres/src/backend/replication/logical/worker.c. @@ -370,7 +409,7 @@ InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); - CatalogTupleInsert(cstoreStripeAttrs, tuple); + InsertTupleAndEnforceConstraints(cstoreStripeAttrs, tuple); CommandCounterIncrement(); From 35a52a6fe16e2fa761b1df43c096b8af333731ac Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Wed, 9 Sep 2020 11:04:27 -0700 Subject: [PATCH 010/124] Use cstore namespace instead of pg_catalog. --- cstore_fdw--1.7.sql | 18 +++++++----------- cstore_metadata_tables.c | 21 +++++++++++++++------ 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 726085b17..7a0c9c7b8 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -3,6 +3,8 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION cstore_fdw" to load this file. \quit +CREATE SCHEMA cstore; + CREATE FUNCTION cstore_fdw_handler() RETURNS fdw_handler AS 'MODULE_PATHNAME' @@ -58,7 +60,7 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); -CREATE TABLE cstore_tables ( +CREATE TABLE cstore.cstore_tables ( relid oid NOT NULL, block_row_count int NOT NULL, version_major bigint NOT NULL, @@ -66,21 +68,17 @@ CREATE TABLE cstore_tables ( PRIMARY KEY (relid) ) WITH (user_catalog_table = true); -ALTER TABLE cstore_tables SET SCHEMA pg_catalog; - -CREATE TABLE cstore_stripes ( +CREATE TABLE cstore.cstore_stripes ( relid oid NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, skiplist_length bigint NOT NULL, data_length bigint NOT NULL, PRIMARY KEY (relid, stripe), - FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid) REFERENCES cstore.cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -ALTER TABLE cstore_stripes SET SCHEMA pg_catalog; - -CREATE TABLE cstore_stripe_attr ( +CREATE TABLE cstore.cstore_stripe_attr ( relid oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, @@ -88,7 +86,5 @@ CREATE TABLE cstore_stripe_attr ( value_size bigint NOT NULL, skiplist_size bigint NOT NULL, PRIMARY KEY (relid, stripe, attr), - FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid, stripe) REFERENCES cstore.cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); - -ALTER TABLE cstore_stripe_attr SET SCHEMA pg_catalog; diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 5c381a029..39e852c55 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -22,6 +22,7 @@ #include "catalog/pg_namespace.h" #include "catalog/pg_collation.h" #include "catalog/pg_type.h" +#include "catalog/namespace.h" #include "commands/defrem.h" #include "commands/trigger.h" #include "executor/executor.h" @@ -45,6 +46,7 @@ static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreTablesRelationId(void); static Oid CStoreTablesIndexRelationId(void); +static Oid CStoreNamespaceId(void); static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, uint64 existsSize, uint64 valuesSize, uint64 skiplistSize); @@ -494,7 +496,7 @@ ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) static Oid CStoreStripeAttrRelationId(void) { - return get_relname_relid("cstore_stripe_attr", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripe_attr", CStoreNamespaceId()); } @@ -505,7 +507,7 @@ CStoreStripeAttrRelationId(void) static Oid CStoreStripeAttrIndexRelationId(void) { - return get_relname_relid("cstore_stripe_attr_pkey", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripe_attr_pkey", CStoreNamespaceId()); } @@ -516,7 +518,7 @@ CStoreStripeAttrIndexRelationId(void) static Oid CStoreStripesRelationId(void) { - return get_relname_relid("cstore_stripes", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripes", CStoreNamespaceId()); } @@ -527,7 +529,7 @@ CStoreStripesRelationId(void) static Oid CStoreStripesIndexRelationId(void) { - return get_relname_relid("cstore_stripes_pkey", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_stripes_pkey", CStoreNamespaceId()); } @@ -538,7 +540,7 @@ CStoreStripesIndexRelationId(void) static Oid CStoreTablesRelationId(void) { - return get_relname_relid("cstore_tables", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_tables", CStoreNamespaceId()); } @@ -549,5 +551,12 @@ CStoreTablesRelationId(void) static Oid CStoreTablesIndexRelationId(void) { - return get_relname_relid("cstore_tables_pkey", PG_CATALOG_NAMESPACE); + return get_relname_relid("cstore_tables_pkey", CStoreNamespaceId()); +} + + +static Oid +CStoreNamespaceId(void) +{ + return get_namespace_oid("cstore", false); } From 0d4e249c97ba7f65a6dcacb4fc8527624b877d4b Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Wed, 9 Sep 2020 14:17:30 -0700 Subject: [PATCH 011/124] Reuse the same state for multiple inserts --- cstore_metadata_tables.c | 304 ++++++++++++++++++++------------------- 1 file changed, 157 insertions(+), 147 deletions(-) diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 39e852c55..3843e4cd6 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -40,6 +40,12 @@ #include "cstore_metadata_serialization.h" +typedef struct +{ + Relation rel; + EState *estate; +} ModifyState; + static Oid CStoreStripeAttrRelationId(void); static Oid CStoreStripeAttrIndexRelationId(void); static Oid CStoreStripesRelationId(void); @@ -47,13 +53,13 @@ static Oid CStoreStripesIndexRelationId(void); static Oid CStoreTablesRelationId(void); static Oid CStoreTablesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static void InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, - uint64 existsSize, uint64 valuesSize, - uint64 skiplistSize); static int TableBlockRowCount(Oid relid); static void DeleteTableMetadataRowIfExists(Oid relid); -static void InsertTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple); -static void DeleteTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple); +static ModifyState * StartModifyRelation(Relation rel); +static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, + bool *nulls); +static void DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple); +static void FinishModifyRelation(ModifyState *state); static EState * create_estate_for_relation(Relation rel); /* constants for cstore_stripe_attr */ @@ -86,10 +92,9 @@ static EState * create_estate_for_relation(Relation rel); void InitCStoreTableMetadata(Oid relid, int blockRowCount) { - Oid cstoreTableOid = InvalidOid; - Relation cstoreTable = NULL; - TupleDesc tupleDescriptor = NULL; - HeapTuple tuple = NULL; + Oid cstoreTablesOid = InvalidOid; + Relation cstoreTables = NULL; + ModifyState *modifyState = NULL; bool nulls[Natts_cstore_tables] = { 0 }; Datum values[Natts_cstore_tables] = { @@ -101,17 +106,16 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) DeleteTableMetadataRowIfExists(relid); - cstoreTableOid = CStoreTablesRelationId(); - cstoreTable = heap_open(cstoreTableOid, RowExclusiveLock); - tupleDescriptor = RelationGetDescr(cstoreTable); + cstoreTablesOid = CStoreTablesRelationId(); + cstoreTables = heap_open(cstoreTablesOid, RowExclusiveLock); - tuple = heap_form_tuple(tupleDescriptor, values, nulls); - - InsertTupleAndEnforceConstraints(cstoreTable, tuple); + modifyState = StartModifyRelation(cstoreTables); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + FinishModifyRelation(modifyState); CommandCounterIncrement(); - heap_close(cstoreTable, NoLock); + heap_close(cstoreTables, NoLock); } @@ -132,11 +136,10 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) Oid cstoreStripesOid = CStoreStripesRelationId(); Relation cstoreStripes = heap_open(cstoreStripesOid, RowExclusiveLock); - TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripes); - HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); - - InsertTupleAndEnforceConstraints(cstoreStripes, tuple); + ModifyState *modifyState = StartModifyRelation(cstoreStripes); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + FinishModifyRelation(modifyState); CommandCounterIncrement(); @@ -268,7 +271,9 @@ DeleteTableMetadataRowIfExists(Oid relid) heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { - DeleteTupleAndEnforceConstraints(cstoreTables, heapTuple); + ModifyState *modifyState = StartModifyRelation(cstoreTables); + DeleteTupleAndEnforceConstraints(modifyState, heapTuple); + FinishModifyRelation(modifyState); } systable_endscan_ordered(scanDescriptor); @@ -277,144 +282,33 @@ DeleteTableMetadataRowIfExists(Oid relid) } -/* - * InsertTupleAndEnforceConstraints inserts a tuple into a relation and - * makes sure constraints (e.g. FK constraints, NOT NULL, ...) are enforced. - */ -static void -InsertTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple) -{ - EState *estate = NULL; - TupleTableSlot *slot = NULL; - - estate = create_estate_for_relation(rel); - slot = ExecInitExtraTupleSlot(estate, RelationGetDescr(rel), &TTSOpsHeapTuple); - ExecStoreHeapTuple(heapTuple, slot, false); - - ExecOpenIndices(estate->es_result_relation_info, false); - - /* ExecSimpleRelationInsert executes any constraints */ - ExecSimpleRelationInsert(estate, slot); - - ExecCloseIndices(estate->es_result_relation_info); - - AfterTriggerEndQuery(estate); - ExecCleanUpTriggerState(estate); - ExecResetTupleTable(estate->es_tupleTable, false); - FreeExecutorState(estate); -} - - - -/* - * DeleteTupleAndEnforceConstraints deletes a tuple from a relation and - * makes sure constraints (e.g. FK constraints) are enforced. - */ -static void -DeleteTupleAndEnforceConstraints(Relation rel, HeapTuple heapTuple) -{ - EState *estate = create_estate_for_relation(rel); - ResultRelInfo *resultRelInfo = estate->es_result_relation_info; - - ItemPointer tid = &(heapTuple->t_self); - simple_table_tuple_delete(rel, tid, estate->es_snapshot); - - /* execute AFTER ROW DELETE Triggers to enforce constraints */ - ExecARDeleteTriggers(estate, resultRelInfo, - tid, NULL, NULL); - - AfterTriggerEndQuery(estate); - ExecCleanUpTriggerState(estate); - ExecResetTupleTable(estate->es_tupleTable, false); - FreeExecutorState(estate); -} - - -/* - * Based on a similar function from - * postgres/src/backend/replication/logical/worker.c. - * - * Executor state preparation for evaluation of constraint expressions, - * indexes and triggers. - * - * This is based on similar code in copy.c - */ -static EState * -create_estate_for_relation(Relation rel) -{ - EState *estate; - ResultRelInfo *resultRelInfo; - RangeTblEntry *rte; - - estate = CreateExecutorState(); - - rte = makeNode(RangeTblEntry); - rte->rtekind = RTE_RELATION; - rte->relid = RelationGetRelid(rel); - rte->relkind = rel->rd_rel->relkind; - rte->rellockmode = AccessShareLock; - ExecInitRangeTable(estate, list_make1(rte)); - - resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); - - estate->es_result_relations = resultRelInfo; - estate->es_num_result_relations = 1; - estate->es_result_relation_info = resultRelInfo; - - estate->es_output_cid = GetCurrentCommandId(true); - - /* Prepare to catch AFTER triggers. */ - AfterTriggerBeginQuery(); - - return estate; -} - - /* * SaveStripeFooter stores give StripeFooter as cstore_stripe_attr records. */ void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer) { - for (AttrNumber attr = 1; attr <= footer->columnCount; attr++) - { - InsertStripeAttrRow(relid, stripe, attr, - footer->existsSizeArray[attr - 1], - footer->valueSizeArray[attr - 1], - footer->skipListSizeArray[attr - 1]); - } -} - - -/* - * InsertStripeAttrRow adds a row to cstore_stripe_attr. - */ -static void -InsertStripeAttrRow(Oid relid, uint64 stripe, AttrNumber attr, - uint64 existsSize, uint64 valuesSize, - uint64 skiplistSize) -{ - bool nulls[Natts_cstore_stripe_attr] = { 0 }; - Datum values[Natts_cstore_stripe_attr] = { - ObjectIdGetDatum(relid), - Int64GetDatum(stripe), - Int16GetDatum(attr), - Int64GetDatum(existsSize), - Int64GetDatum(valuesSize), - Int64GetDatum(skiplistSize) - }; - Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, RowExclusiveLock); - TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripeAttrs); - HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + ModifyState *modifyState = StartModifyRelation(cstoreStripeAttrs); - InsertTupleAndEnforceConstraints(cstoreStripeAttrs, tuple); + for (AttrNumber attr = 1; attr <= footer->columnCount; attr++) + { + bool nulls[Natts_cstore_stripe_attr] = { 0 }; + Datum values[Natts_cstore_stripe_attr] = { + ObjectIdGetDatum(relid), + Int64GetDatum(stripe), + Int16GetDatum(attr), + Int64GetDatum(footer->existsSizeArray[attr - 1]), + Int64GetDatum(footer->valueSizeArray[attr - 1]), + Int64GetDatum(footer->skipListSizeArray[attr - 1]) + }; - CommandCounterIncrement(); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + } + FinishModifyRelation(modifyState); heap_close(cstoreStripeAttrs, NoLock); } @@ -489,6 +383,118 @@ ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) } +/* + * StartModifyRelation allocates resources for modifications. + */ +static ModifyState * +StartModifyRelation(Relation rel) +{ + ModifyState *modifyState = NULL; + EState *estate = create_estate_for_relation(rel); + + /* ExecSimpleRelationInsert, ... require caller to open indexes */ + ExecOpenIndices(estate->es_result_relation_info, false); + + modifyState = palloc(sizeof(ModifyState)); + modifyState->rel = rel; + modifyState->estate = estate; + + return modifyState; +} + + +/* + * InsertTupleAndEnforceConstraints inserts a tuple into a relation and makes + * sure constraints are enforced and indexes are updated. + */ +static void +InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls) +{ + TupleDesc tupleDescriptor = RelationGetDescr(state->rel); + HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor, + &TTSOpsHeapTuple); + ExecStoreHeapTuple(tuple, slot, false); + + /* use ExecSimpleRelationInsert to enforce constraints */ + ExecSimpleRelationInsert(state->estate, slot); +} + + +/* + * DeleteTupleAndEnforceConstraints deletes a tuple from a relation and + * makes sure constraints (e.g. FK constraints) are enforced. + */ +static void +DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple) +{ + EState *estate = state->estate; + ResultRelInfo *resultRelInfo = estate->es_result_relation_info; + + ItemPointer tid = &(heapTuple->t_self); + simple_table_tuple_delete(state->rel, tid, estate->es_snapshot); + + /* execute AFTER ROW DELETE Triggers to enforce constraints */ + ExecARDeleteTriggers(estate, resultRelInfo, tid, NULL, NULL); +} + + +/* + * FinishModifyRelation cleans up resources after modifications are done. + */ +static void +FinishModifyRelation(ModifyState *state) +{ + ExecCloseIndices(state->estate->es_result_relation_info); + + AfterTriggerEndQuery(state->estate); + ExecCleanUpTriggerState(state->estate); + ExecResetTupleTable(state->estate->es_tupleTable, false); + FreeExecutorState(state->estate); +} + + +/* + * Based on a similar function from + * postgres/src/backend/replication/logical/worker.c. + * + * Executor state preparation for evaluation of constraint expressions, + * indexes and triggers. + * + * This is based on similar code in copy.c + */ +static EState * +create_estate_for_relation(Relation rel) +{ + EState *estate; + ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + + estate = CreateExecutorState(); + + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = RelationGetRelid(rel); + rte->relkind = rel->rd_rel->relkind; + rte->rellockmode = AccessShareLock; + ExecInitRangeTable(estate, list_make1(rte)); + + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); + + estate->es_result_relations = resultRelInfo; + estate->es_num_result_relations = 1; + estate->es_result_relation_info = resultRelInfo; + + estate->es_output_cid = GetCurrentCommandId(true); + + /* Prepare to catch AFTER triggers. */ + AfterTriggerBeginQuery(); + + return estate; +} + + /* * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr. * TODO: should we cache this similar to citus? @@ -555,6 +561,10 @@ CStoreTablesIndexRelationId(void) } +/* + * CStoreNamespaceId returns namespace id of the schema we store cstore + * related tables. + */ static Oid CStoreNamespaceId(void) { From e9045227cd2c31acf568af623de4bb27fd39eb73 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 9 Sep 2020 12:44:41 -0700 Subject: [PATCH 012/124] create relfilenode for FDW --- cstore_fdw.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 146 insertions(+), 15 deletions(-) diff --git a/cstore_fdw.c b/cstore_fdw.c index 8ce3a7296..073a68130 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -21,9 +21,13 @@ #include "access/heapam.h" #include "access/reloptions.h" #include "access/tuptoaster.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/indexing.h" #include "catalog/namespace.h" #include "catalog/pg_foreign_table.h" #include "catalog/pg_namespace.h" +#include "catalog/storage.h" #include "commands/copy.h" #include "commands/dbcommands.h" #include "commands/defrem.h" @@ -50,18 +54,20 @@ #include "parser/parser.h" #include "parser/parse_coerce.h" #include "parser/parse_type.h" +#include "storage/smgr.h" #include "tcop/utility.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" +#if PG_VERSION_NUM < 120000 +#include "utils/rel.h" +#endif #if PG_VERSION_NUM >= 120000 #include "utils/snapmgr.h" #else #include "utils/tqual.h" #endif -#if PG_VERSION_NUM < 120000 -#include "utils/rel.h" -#endif +#include "utils/syscache.h" #include "cstore.h" #include "cstore_fdw.h" @@ -124,6 +130,7 @@ static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); static List * DroppedCStoreFilenameList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); +static void InitializeRelFileNode(Relation relation); static void TruncateCStoreTables(List *cstoreRelationList); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); @@ -183,6 +190,9 @@ static void CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relatio static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); #endif +static void cstore_fdw_initrel(Relation rel); +static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); +static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode); PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); PG_FUNCTION_INFO_V1(cstore_table_size); @@ -261,7 +271,7 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) { Oid relationId = RangeVarGetRelid(createStatement->base.relation, AccessShareLock, false); - Relation relation = heap_open(relationId, AccessExclusiveLock); + Relation relation = cstore_fdw_open(relationId, AccessExclusiveLock); /* * Make sure database directory exists before creating a table. @@ -368,7 +378,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, foreach(fileListCell, droppedTables) { char *fileName = lfirst(fileListCell); - + //TODO: relation storage is not dropped DeleteCStoreTableFiles(fileName); } } @@ -562,7 +572,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) * Open and lock the relation. We acquire ShareUpdateExclusiveLock to allow * concurrent reads, but block concurrent writes. */ - relation = heap_openrv(copyStatement->relation, ShareUpdateExclusiveLock); + relation = cstore_fdw_openrv(copyStatement->relation, ShareUpdateExclusiveLock); relationId = RelationGetRelid(relation); /* allocate column values and nulls arrays */ @@ -850,7 +860,7 @@ OpenRelationsForTruncate(List *cstoreTableList) foreach(relationCell, cstoreTableList) { RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); - Relation relation = heap_openrv(rangeVar, AccessExclusiveLock); + Relation relation = cstore_fdw_openrv(rangeVar, AccessExclusiveLock); Oid relationId = relation->rd_id; AclResult aclresult = pg_class_aclcheck(relationId, GetUserId(), ACL_TRUNCATE); @@ -889,11 +899,76 @@ TruncateCStoreTables(List *cstoreRelationList) Assert(CStoreTable(relationId)); cstoreOptions = CStoreGetOptions(relationId); + if (OidIsValid(relation->rd_rel->relfilenode)) + { + RelationOpenSmgr(relation); + RelationDropStorage(relation); + } DeleteCStoreTableFiles(cstoreOptions->filename); InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } } +/* + * Version 11 and earlier already create a relfilenode for foreign + * tables. Version 12 and later do not, so we need to create one manually. + */ +static void +InitializeRelFileNode(Relation relation) +{ +#if PG_VERSION_NUM >= 120000 + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; + + /* + * Get a writable copy of the pg_class tuple for the given relation. + */ + pg_class = heap_open(RelationRelationId, RowExclusiveLock); + + tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "could not find tuple for relation %u", + RelationGetRelid(relation)); + classform = (Form_pg_class) GETSTRUCT(tuple); + + if (!OidIsValid(classform->relfilenode)) + { + Oid tablespace; + Oid filenode = relation->rd_id; + char persistence = relation->rd_rel->relpersistence; + RelFileNode newrnode; + SMgrRelation srel; + + if (OidIsValid(relation->rd_rel->reltablespace)) + tablespace = relation->rd_rel->reltablespace; + else + tablespace = MyDatabaseTableSpace; + + newrnode.spcNode = tablespace; + newrnode.dbNode = MyDatabaseId; + newrnode.relNode = filenode; + + srel = RelationCreateStorage(newrnode, persistence); + smgrclose(srel); + + classform->relfilenode = filenode; + classform->relpages = 0; /* it's empty until further notice */ + classform->reltuples = 0; + classform->relallvisible = 0; + classform->relfrozenxid = InvalidTransactionId; + classform->relminmxid = InvalidTransactionId; + + CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); + CommandCounterIncrement(); + } + + heap_freetuple(tuple); + heap_close(pg_class, RowExclusiveLock); +#endif +} + /* * CStoreTable checks if the given table name belongs to a foreign columnar store @@ -1179,6 +1254,7 @@ cstore_clean_table_resources(PG_FUNCTION_ARGS) struct stat fileStat; int statResult = -1; + //TODO: relation storage is not dropped appendStringInfo(filePath, "%s/%s/%d/%d", DataDir, CSTORE_FDW_NAME, (int) MyDatabaseId, (int) relationId); @@ -1402,7 +1478,7 @@ static char * CStoreDefaultFilePath(Oid foreignTableId) { StringInfo cstoreFilePath = NULL; - Relation relation = relation_open(foreignTableId, AccessShareLock); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); RelFileNode relationFileNode = relation->rd_node; Oid databaseOid = relationFileNode.dbNode; Oid relationFileOid = relationFileNode.relNode; @@ -1453,7 +1529,7 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId { Path *foreignScanPath = NULL; CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - Relation relation = heap_open(foreignTableId, AccessShareLock); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); /* * We skip reading columns that are not in query. Here we assume that all @@ -1659,7 +1735,7 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) List *restrictInfoList = baserel->baserestrictinfo; ListCell *restrictInfoCell = NULL; const AttrNumber wholeRow = 0; - Relation relation = heap_open(foreignTableId, AccessShareLock); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); TupleDesc tupleDescriptor = RelationGetDescr(relation); /* first add the columns used in joins and projections */ @@ -1750,10 +1826,13 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) { - Oid foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); + Relation relation = scanState->ss.ss_currentRelation; + CStoreOptions *cstoreOptions; + Oid foreignTableId; - ExplainPropertyText("CStore File", cstoreOptions->filename, explainState); + cstore_fdw_initrel(relation); + foreignTableId = RelationGetRelid(relation); + cstoreOptions = CStoreGetOptions(foreignTableId); /* supress file size if we're not showing cost details */ if (explainState->costs) @@ -1784,6 +1863,8 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) List *foreignPrivateList = NIL; List *whereClauseList = NIL; + cstore_fdw_initrel(currentRelation); + /* if Explain with no Analyze, do nothing */ if (executorFlags & EXEC_FLAG_EXPLAIN_ONLY) { @@ -1869,9 +1950,12 @@ CStoreAnalyzeForeignTable(Relation relation, BlockNumber *totalPageCount) { Oid foreignTableId = RelationGetRelid(relation); - CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); + CStoreOptions *cstoreOptions; struct stat statBuffer; + cstore_fdw_initrel(relation); + cstoreOptions = CStoreGetOptions(foreignTableId); + int statResult = stat(cstoreOptions->filename, &statBuffer); if (statResult < 0) { @@ -1924,6 +2008,7 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, TupleDesc tupleDescriptor = RelationGetDescr(relation); uint32 columnCount = tupleDescriptor->natts; + cstore_fdw_initrel(relation); /* create list of columns of the relation */ uint32 columnIndex = 0; @@ -2147,7 +2232,7 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela Relation relation = NULL; foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); - relation = heap_open(foreignTableOid, ShareUpdateExclusiveLock); + relation = cstore_fdw_open(foreignTableOid, ShareUpdateExclusiveLock); cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); @@ -2246,3 +2331,49 @@ CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, #endif + +/* + * Versions 12 and later do not initialize rd_node even if the relation has a + * valid relfilenode, so we need to initialize it each time a cstore FDW + * relation is opened. + */ +static void +cstore_fdw_initrel(Relation rel) +{ +#if PG_VERSION_NUM >= 120000 + if (rel->rd_rel->relfilenode == InvalidOid) + InitializeRelFileNode(rel); + + /* + * Copied code from RelationInitPhysicalAddr(), which doesn't + * work on foreign tables. + */ + if (OidIsValid(rel->rd_rel->reltablespace)) + rel->rd_node.spcNode = rel->rd_rel->reltablespace; + else + rel->rd_node.spcNode = MyDatabaseTableSpace; + + rel->rd_node.dbNode = MyDatabaseId; + rel->rd_node.relNode = rel->rd_rel->relfilenode; +#endif +} + +static Relation +cstore_fdw_open(Oid relationId, LOCKMODE lockmode) +{ + Relation rel = heap_open(relationId, lockmode); + + cstore_fdw_initrel(rel); + + return rel; +} + +static Relation +cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode) +{ + Relation rel = heap_openrv(relation, lockmode); + + cstore_fdw_initrel(rel); + + return rel; +} From b18c9c8060365ffb2487934d1a2c693a95152fd8 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 14:21:56 -0700 Subject: [PATCH 013/124] drop storage for DROP command --- cstore_fdw.c | 75 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/cstore_fdw.c b/cstore_fdw.c index 073a68130..5ad465807 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -127,7 +127,7 @@ static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString); static uint64 CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString); static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); -static List * DroppedCStoreFilenameList(DropStmt *dropStatement); +static List * DroppedCStoreRelidList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void InitializeRelFileNode(Relation relation); @@ -369,17 +369,43 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else { - ListCell *fileListCell = NULL; - List *droppedTables = DroppedCStoreFilenameList((DropStmt *) parseTree); + List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); + List *dropFiles = NIL; + ListCell *lc = NULL; + + /* drop smgr storage */ + foreach(lc, dropRelids) + { + Oid relid = lfirst_oid(lc); + Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); + CStoreOptions *cstoreOptions = CStoreGetOptions(relid); + char *defaultfilename = CStoreDefaultFilePath(relid); + + RelationOpenSmgr(relation); + RelationDropStorage(relation); + heap_close(relation, AccessExclusiveLock); + + /* + * Skip files that are placed in default location, they are handled + * by sql drop trigger. Both paths are generated by code, use + * of strcmp is safe here. + */ + if (strcmp(defaultfilename, cstoreOptions->filename) == 0) + { + continue; + } + + dropFiles = lappend(dropFiles, cstoreOptions->filename); + } CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - foreach(fileListCell, droppedTables) + /* drop files */ + foreach(lc, dropFiles) { - char *fileName = lfirst(fileListCell); - //TODO: relation storage is not dropped - DeleteCStoreTableFiles(fileName); + char *filename = lfirst(lc); + DeleteCStoreTableFiles(filename); } } } @@ -783,13 +809,13 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) /* - * DropppedCStoreFilenameList extracts and returns the list of cstore file names + * DropppedCStoreRelidList extracts and returns the list of cstore relids * from DROP table statement */ static List * -DroppedCStoreFilenameList(DropStmt *dropStatement) +DroppedCStoreRelidList(DropStmt *dropStatement) { - List *droppedCStoreFileList = NIL; + List *droppedCStoreRelidList = NIL; if (dropStatement->removeType == OBJECT_FOREIGN_TABLE) { @@ -802,26 +828,13 @@ DroppedCStoreFilenameList(DropStmt *dropStatement) Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); if (CStoreTable(relationId)) { - CStoreOptions *cstoreOptions = CStoreGetOptions(relationId); - char *defaultfilename = CStoreDefaultFilePath(relationId); - - /* - * Skip files that are placed in default location, they are handled - * by sql drop trigger. Both paths are generated by code, use - * of strcmp is safe here. - */ - if (strcmp(defaultfilename, cstoreOptions->filename) == 0) - { - continue; - } - - droppedCStoreFileList = lappend(droppedCStoreFileList, - cstoreOptions->filename); + droppedCStoreRelidList = lappend_oid(droppedCStoreRelidList, + relationId); } } } - return droppedCStoreFileList; + return droppedCStoreRelidList; } @@ -1254,7 +1267,15 @@ cstore_clean_table_resources(PG_FUNCTION_ARGS) struct stat fileStat; int statResult = -1; - //TODO: relation storage is not dropped + /* + * TODO: Event triggers do not offer the relfilenode of the + * dropped table, and by the time the sql_drop event trigger + * is called, the object is already gone so we can't look it + * up. Therefore, we can't drop the Smgr storage here, which + * means that cascaded drops of cstore foreign tables will + * leak storage. + */ + appendStringInfo(filePath, "%s/%s/%d/%d", DataDir, CSTORE_FDW_NAME, (int) MyDatabaseId, (int) relationId); From a2f7eadeb9fba32b46158eec6f6837b1bf1b2ac3 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 16:02:00 -0700 Subject: [PATCH 014/124] lock while initializing relfilenode --- cstore_fdw.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cstore_fdw.c b/cstore_fdw.c index 5ad465807..07b47d590 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -948,12 +948,21 @@ InitializeRelFileNode(Relation relation) if (!OidIsValid(classform->relfilenode)) { + Relation tmprel; Oid tablespace; Oid filenode = relation->rd_id; char persistence = relation->rd_rel->relpersistence; RelFileNode newrnode; SMgrRelation srel; + /* + * Upgrade to AccessExclusiveLock, and hold until the end of the + * transaction. This shouldn't happen during a read, but it's hard to + * prove that because it happens lazily. + */ + tmprel = heap_open(relation->rd_id, AccessExclusiveLock); + heap_close(tmprel, NoLock); + if (OidIsValid(relation->rd_rel->reltablespace)) tablespace = relation->rd_rel->reltablespace; else From dee408248cd0ec5830df0ccbd456127a96dc65be Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 9 Sep 2020 12:44:41 -0700 Subject: [PATCH 015/124] Replace file access with Smgr --- cstore.c | 132 +---------------- cstore.h | 32 ++-- cstore_fdw.c | 268 +++++++--------------------------- cstore_reader.c | 115 ++++++--------- cstore_writer.c | 166 ++++++++------------- expected/drop.out | 57 -------- expected/truncate.out | 31 ---- input/block_filtering.source | 6 +- input/copyto.source | 3 +- input/create.source | 16 +- input/data_types.source | 18 +-- output/block_filtering.source | 6 +- output/copyto.source | 3 +- output/create.source | 18 +-- output/data_types.source | 18 +-- sql/drop.sql | 28 ---- sql/truncate.sql | 19 --- 17 files changed, 209 insertions(+), 727 deletions(-) diff --git a/cstore.c b/cstore.c index 658c15745..f04fc4fc6 100644 --- a/cstore.c +++ b/cstore.c @@ -21,9 +21,6 @@ #include "cstore.h" -static void CreateDirectory(StringInfo directoryName); -static bool DirectoryExists(StringInfo directoryName); - /* ParseCompressionType converts a string to a compression type. */ CompressionType ParseCompressionType(const char *compressionTypeString) @@ -44,80 +41,6 @@ ParseCompressionType(const char *compressionTypeString) } -/* CreateDirectory creates a new directory with the given directory name. */ -static void -CreateDirectory(StringInfo directoryName) -{ - int makeOK = mkdir(directoryName->data, S_IRWXU); - if (makeOK != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not create directory \"%s\": %m", - directoryName->data))); - } -} - - -/* DirectoryExists checks if a directory exists for the given directory name. */ -static bool -DirectoryExists(StringInfo directoryName) -{ - bool directoryExists = true; - struct stat directoryStat; - - int statOK = stat(directoryName->data, &directoryStat); - if (statOK == 0) - { - /* file already exists; check that it is a directory */ - if (!S_ISDIR(directoryStat.st_mode)) - { - ereport(ERROR, (errmsg("\"%s\" is not a directory", directoryName->data), - errhint("You need to remove or rename the file \"%s\".", - directoryName->data))); - } - } - else - { - if (errno == ENOENT) - { - directoryExists = false; - } - else - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat directory \"%s\": %m", - directoryName->data))); - } - } - - return directoryExists; -} - - -/* - * RemoveCStoreDatabaseDirectory removes CStore directory previously - * created for this database. - * However it does not remove 'cstore_fdw' directory even if there - * are no other databases left. - */ -void -RemoveCStoreDatabaseDirectory(Oid databaseOid) -{ - StringInfo cstoreDirectoryPath = makeStringInfo(); - StringInfo cstoreDatabaseDirectoryPath = makeStringInfo(); - - appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); - - appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, - CSTORE_FDW_NAME, databaseOid); - - if (DirectoryExists(cstoreDatabaseDirectoryPath)) - { - rmtree(cstoreDatabaseDirectoryPath->data, true); - } -} - - /* * InitializeCStoreTableFile creates data and footer file for a cstore table. * The function assumes data and footer files do not exist, therefore @@ -136,62 +59,9 @@ InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *csto * Initialize state to write to the cstore file. This creates an * empty data file and a valid footer file for the table. */ - writeState = CStoreBeginWrite(relationId, cstoreOptions->filename, + writeState = CStoreBeginWrite(relationId, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); CStoreEndWrite(writeState); } - - -/* - * CreateCStoreDatabaseDirectory creates the directory (and parent directories, - * if needed) used to store automatically managed cstore_fdw files. The path to - * the directory is $PGDATA/cstore_fdw/{databaseOid}. - */ -void -CreateCStoreDatabaseDirectory(Oid databaseOid) -{ - bool cstoreDirectoryExists = false; - bool databaseDirectoryExists = false; - StringInfo cstoreDatabaseDirectoryPath = NULL; - - StringInfo cstoreDirectoryPath = makeStringInfo(); - appendStringInfo(cstoreDirectoryPath, "%s/%s", DataDir, CSTORE_FDW_NAME); - - cstoreDirectoryExists = DirectoryExists(cstoreDirectoryPath); - if (!cstoreDirectoryExists) - { - CreateDirectory(cstoreDirectoryPath); - } - - cstoreDatabaseDirectoryPath = makeStringInfo(); - appendStringInfo(cstoreDatabaseDirectoryPath, "%s/%s/%u", DataDir, - CSTORE_FDW_NAME, databaseOid); - - databaseDirectoryExists = DirectoryExists(cstoreDatabaseDirectoryPath); - if (!databaseDirectoryExists) - { - CreateDirectory(cstoreDatabaseDirectoryPath); - } -} - - -/* - * DeleteCStoreTableFiles deletes the data and footer files for a cstore table - * whose data filename is given. - */ -void -DeleteCStoreTableFiles(char *filename) -{ - int dataFileRemoved = 0; - - /* delete the data file */ - dataFileRemoved = unlink(filename); - if (dataFileRemoved != 0) - { - ereport(WARNING, (errcode_for_file_access(), - errmsg("could not delete file \"%s\": %m", - filename))); - } -} diff --git a/cstore.h b/cstore.h index 20cac7e05..9679fea9c 100644 --- a/cstore.h +++ b/cstore.h @@ -16,10 +16,10 @@ #include "fmgr.h" #include "lib/stringinfo.h" +#include "storage/bufpage.h" #include "utils/relcache.h" /* Defines for valid option names */ -#define OPTION_NAME_FILENAME "filename" #define OPTION_NAME_COMPRESSION_TYPE "compression" #define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" #define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" @@ -68,7 +68,6 @@ typedef enum */ typedef struct CStoreOptions { - char *filename; CompressionType compressionType; uint64 stripeRowCount; uint32 blockRowCount; @@ -203,10 +202,9 @@ typedef struct TableReadState { Oid relationId; - FILE *tableFile; TableMetadata *tableMetadata; TupleDesc tupleDescriptor; - + Relation relation; /* * List of Var pointers for columns in the query. We use this both for * getting vector of projected columns, and also when we want to build @@ -228,7 +226,6 @@ typedef struct TableReadState typedef struct TableWriteState { Oid relationId; - FILE *tableFile; TableMetadata *tableMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; @@ -257,11 +254,9 @@ extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); extern void CreateCStoreDatabaseDirectory(Oid databaseOid); extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); -extern void DeleteCStoreTableFiles(char *filename); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(Oid relationId, - const char *filename, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, @@ -271,7 +266,7 @@ extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, extern void CStoreEndWrite(TableWriteState *state); /* Function declarations for reading from a cstore file */ -extern TableReadState * CStoreBeginRead(Oid relationId, const char *filename, +extern TableReadState * CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, List *projectedColumnList, List *qualConditions); extern bool CStoreReadFinished(TableReadState *state); @@ -286,7 +281,7 @@ extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *co uint32 blockRowCount); extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount); -extern uint64 CStoreTableRowCount(Oid relid, const char *filename); +extern uint64 CStoreTableRowCount(Relation relation); extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); @@ -294,8 +289,27 @@ extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressio /* cstore_metadata_tables.c */ extern void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer); extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount); + extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); extern TableMetadata * ReadTableMetadata(Oid relid); +typedef struct SmgrAddr +{ + BlockNumber blockno; + uint32 offset; +} SmgrAddr; + +static inline SmgrAddr +logical_to_smgr(uint64 logicalOffset) +{ + uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; + SmgrAddr addr; + + addr.blockno = logicalOffset / bytes_per_page; + addr.offset = logicalOffset % bytes_per_page; + + return addr; +} + #endif /* CSTORE_H */ diff --git a/cstore_fdw.c b/cstore_fdw.c index 07b47d590..cd8dcf4ef 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -100,7 +100,6 @@ static const uint32 ValidOptionCount = 4; static const CStoreValidOption ValidOptionArray[] = { /* foreign table options */ - { OPTION_NAME_FILENAME, ForeignTableRelationId }, { OPTION_NAME_COMPRESSION_TYPE, ForeignTableRelationId }, { OPTION_NAME_STRIPE_ROW_COUNT, ForeignTableRelationId }, { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } @@ -130,7 +129,7 @@ static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); static List * DroppedCStoreRelidList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); -static void InitializeRelFileNode(Relation relation); +static void InitializeRelFileNode(Relation relation, bool force); static void TruncateCStoreTables(List *cstoreRelationList); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); @@ -140,10 +139,9 @@ static StringInfo OptionNamesString(Oid currentContextId); static HeapTuple GetSlotHeapTuple(TupleTableSlot *tts); static CStoreOptions * CStoreGetOptions(Oid foreignTableId); static char * CStoreGetOptionValue(Oid foreignTableId, const char *optionName); -static void ValidateForeignTableOptions(char *filename, char *compressionTypeString, +static void ValidateForeignTableOptions(char *compressionTypeString, char *stripeRowCountString, char *blockRowCountString); -static char * CStoreDefaultFilePath(Oid foreignTableId); static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId); static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, @@ -158,8 +156,8 @@ static ForeignScan * CStoreGetForeignPlan(PlannerInfo *root, RelOptInfo *baserel Oid foreignTableId, ForeignPath *bestPath, List *targetList, List *scanClauses); #endif -static double TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename); -static BlockNumber PageCount(const char *filename); +static double TupleCountEstimate(Relation relation, RelOptInfo *baserel); +static BlockNumber PageCount(Relation relation); static List * ColumnList(RelOptInfo *baserel, Oid foreignTableId); static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState); @@ -250,17 +248,7 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) triggerData = (EventTriggerData *) fcinfo->context; parseTree = triggerData->parsetree; - if (nodeTag(parseTree) == T_CreateForeignServerStmt) - { - CreateForeignServerStmt *serverStatement = (CreateForeignServerStmt *) parseTree; - - char *foreignWrapperName = serverStatement->fdwname; - if (strncmp(foreignWrapperName, CSTORE_FDW_NAME, NAMEDATALEN) == 0) - { - CreateCStoreDatabaseDirectory(MyDatabaseId); - } - } - else if (nodeTag(parseTree) == T_CreateForeignTableStmt) + if (nodeTag(parseTree) == T_CreateForeignTableStmt) { CreateForeignTableStmt *createStatement = (CreateForeignTableStmt *) parseTree; char *serverName = createStatement->servername; @@ -280,8 +268,6 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) * We have no chance to hook into server creation to create data * directory for it during database creation time. */ - CreateCStoreDatabaseDirectory(MyDatabaseId); - InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); heap_close(relation, AccessExclusiveLock); } @@ -361,16 +347,10 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - - if (removeCStoreDirectory) - { - RemoveCStoreDatabaseDirectory(MyDatabaseId); - } } else { List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); - List *dropFiles = NIL; ListCell *lc = NULL; /* drop smgr storage */ @@ -378,35 +358,14 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, { Oid relid = lfirst_oid(lc); Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); - CStoreOptions *cstoreOptions = CStoreGetOptions(relid); - char *defaultfilename = CStoreDefaultFilePath(relid); RelationOpenSmgr(relation); RelationDropStorage(relation); heap_close(relation, AccessExclusiveLock); - - /* - * Skip files that are placed in default location, they are handled - * by sql drop trigger. Both paths are generated by code, use - * of strcmp is safe here. - */ - if (strcmp(defaultfilename, cstoreOptions->filename) == 0) - { - continue; - } - - dropFiles = lappend(dropFiles, cstoreOptions->filename); } CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - - /* drop files */ - foreach(lc, dropFiles) - { - char *filename = lfirst(lc); - DeleteCStoreTableFiles(filename); - } } } else if (nodeTag(parseTree) == T_TruncateStmt) @@ -449,18 +408,9 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else if (nodeTag(parseTree) == T_DropdbStmt) { - DropdbStmt *dropDdStmt = (DropdbStmt *) parseTree; - bool missingOk = true; - Oid databaseOid = get_database_oid(dropDdStmt->dbname, missingOk); - /* let postgres handle error checking and dropping of the database */ CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); - - if (databaseOid != InvalidOid) - { - RemoveCStoreDatabaseDirectory(databaseOid); - } } /* handle other utility statements */ @@ -642,11 +592,11 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) /* init state to write to the cstore file */ writeState = CStoreBeginWrite(relationId, - cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); + writeState->relation = relation; while (nextRowFound) { @@ -912,12 +862,7 @@ TruncateCStoreTables(List *cstoreRelationList) Assert(CStoreTable(relationId)); cstoreOptions = CStoreGetOptions(relationId); - if (OidIsValid(relation->rd_rel->relfilenode)) - { - RelationOpenSmgr(relation); - RelationDropStorage(relation); - } - DeleteCStoreTableFiles(cstoreOptions->filename); + InitializeRelFileNode(relation, true); InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } } @@ -927,12 +872,12 @@ TruncateCStoreTables(List *cstoreRelationList) * tables. Version 12 and later do not, so we need to create one manually. */ static void -InitializeRelFileNode(Relation relation) +InitializeRelFileNode(Relation relation, bool force) { #if PG_VERSION_NUM >= 120000 - Relation pg_class; - HeapTuple tuple; - Form_pg_class classform; + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; /* * Get a writable copy of the pg_class tuple for the given relation. @@ -946,12 +891,12 @@ InitializeRelFileNode(Relation relation) RelationGetRelid(relation)); classform = (Form_pg_class) GETSTRUCT(tuple); - if (!OidIsValid(classform->relfilenode)) + if (!OidIsValid(classform->relfilenode) || force) { + char persistence = relation->rd_rel->relpersistence; Relation tmprel; Oid tablespace; - Oid filenode = relation->rd_id; - char persistence = relation->rd_rel->relpersistence; + Oid filenode; RelFileNode newrnode; SMgrRelation srel; @@ -968,6 +913,8 @@ InitializeRelFileNode(Relation relation) else tablespace = MyDatabaseTableSpace; + filenode = GetNewRelFileNode(tablespace, NULL, persistence); + newrnode.spcNode = tablespace; newrnode.dbNode = MyDatabaseId; newrnode.relNode = filenode; @@ -1120,32 +1067,20 @@ Datum cstore_table_size(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); - - int64 tableSize = 0; - CStoreOptions *cstoreOptions = NULL; - char *dataFilename = NULL; - int dataFileStatResult = 0; - struct stat dataFileStatBuffer; - bool cstoreTable = CStoreTable(relationId); + Relation relation; + BlockNumber nblocks; + if (!cstoreTable) { ereport(ERROR, (errmsg("relation is not a cstore table"))); } - cstoreOptions = CStoreGetOptions(relationId); - dataFilename = cstoreOptions->filename; - - dataFileStatResult = stat(dataFilename, &dataFileStatBuffer); - if (dataFileStatResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", dataFilename))); - } - - tableSize += dataFileStatBuffer.st_size; - - PG_RETURN_INT64(tableSize); + relation = cstore_fdw_open(relationId, AccessShareLock); + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + heap_close(relation, AccessShareLock); + PG_RETURN_INT64(nblocks * BLCKSZ); } @@ -1197,7 +1132,6 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) Oid optionContextId = PG_GETARG_OID(1); List *optionList = untransformRelOptions(optionArray); ListCell *optionCell = NULL; - char *filename = NULL; char *compressionTypeString = NULL; char *stripeRowCountString = NULL; char *blockRowCountString = NULL; @@ -1232,11 +1166,7 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) optionNamesString->data))); } - if (strncmp(optionName, OPTION_NAME_FILENAME, NAMEDATALEN) == 0) - { - filename = defGetString(optionDef); - } - else if (strncmp(optionName, OPTION_NAME_COMPRESSION_TYPE, NAMEDATALEN) == 0) + if (strncmp(optionName, OPTION_NAME_COMPRESSION_TYPE, NAMEDATALEN) == 0) { compressionTypeString = defGetString(optionDef); } @@ -1252,7 +1182,7 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) if (optionContextId == ForeignTableRelationId) { - ValidateForeignTableOptions(filename, compressionTypeString, + ValidateForeignTableOptions(compressionTypeString, stripeRowCountString, blockRowCountString); } @@ -1271,11 +1201,6 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) Datum cstore_clean_table_resources(PG_FUNCTION_ARGS) { - Oid relationId = PG_GETARG_OID(0); - StringInfo filePath = makeStringInfo(); - struct stat fileStat; - int statResult = -1; - /* * TODO: Event triggers do not offer the relfilenode of the * dropped table, and by the time the sql_drop event trigger @@ -1285,19 +1210,6 @@ cstore_clean_table_resources(PG_FUNCTION_ARGS) * leak storage. */ - appendStringInfo(filePath, "%s/%s/%d/%d", DataDir, CSTORE_FDW_NAME, - (int) MyDatabaseId, (int) relationId); - - /* - * Check to see if the file exist first. This is the only way to - * find out if the table being dropped is a cstore table. - */ - statResult = stat(filePath->data, &fileStat); - if (statResult == 0) - { - DeleteCStoreTableFiles(filePath->data); - } - PG_RETURN_VOID(); } @@ -1359,7 +1271,6 @@ static CStoreOptions * CStoreGetOptions(Oid foreignTableId) { CStoreOptions *cstoreOptions = NULL; - char *filename = NULL; CompressionType compressionType = DEFAULT_COMPRESSION_TYPE; int32 stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; int32 blockRowCount = DEFAULT_BLOCK_ROW_COUNT; @@ -1367,7 +1278,6 @@ CStoreGetOptions(Oid foreignTableId) char *stripeRowCountString = NULL; char *blockRowCountString = NULL; - filename = CStoreGetOptionValue(foreignTableId, OPTION_NAME_FILENAME); compressionTypeString = CStoreGetOptionValue(foreignTableId, OPTION_NAME_COMPRESSION_TYPE); stripeRowCountString = CStoreGetOptionValue(foreignTableId, @@ -1375,7 +1285,7 @@ CStoreGetOptions(Oid foreignTableId) blockRowCountString = CStoreGetOptionValue(foreignTableId, OPTION_NAME_BLOCK_ROW_COUNT); - ValidateForeignTableOptions(filename, compressionTypeString, + ValidateForeignTableOptions(compressionTypeString, stripeRowCountString, blockRowCountString); /* parse provided options */ @@ -1392,14 +1302,7 @@ CStoreGetOptions(Oid foreignTableId) blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); } - /* set default filename if it is not provided */ - if (filename == NULL) - { - filename = CStoreDefaultFilePath(foreignTableId); - } - cstoreOptions = palloc0(sizeof(CStoreOptions)); - cstoreOptions->filename = filename; cstoreOptions->compressionType = compressionType; cstoreOptions->stripeRowCount = stripeRowCount; cstoreOptions->blockRowCount = blockRowCount; @@ -1450,12 +1353,9 @@ CStoreGetOptionValue(Oid foreignTableId, const char *optionName) * considered invalid. */ static void -ValidateForeignTableOptions(char *filename, char *compressionTypeString, +ValidateForeignTableOptions(char *compressionTypeString, char *stripeRowCountString, char *blockRowCountString) { - /* we currently do not have any checks for filename */ - (void) filename; - /* check if the provided compression type is valid */ if (compressionTypeString != NULL) { @@ -1500,36 +1400,6 @@ ValidateForeignTableOptions(char *filename, char *compressionTypeString, } -/* - * CStoreDefaultFilePath constructs the default file path to use for a cstore_fdw - * table. The path is of the form $PGDATA/cstore_fdw/{databaseOid}/{relfilenode}. - */ -static char * -CStoreDefaultFilePath(Oid foreignTableId) -{ - StringInfo cstoreFilePath = NULL; - Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); - RelFileNode relationFileNode = relation->rd_node; - Oid databaseOid = relationFileNode.dbNode; - Oid relationFileOid = relationFileNode.relNode; - - relation_close(relation, AccessShareLock); - - /* PG12 onward does not create relfilenode for foreign tables */ - if (databaseOid == InvalidOid) - { - databaseOid = MyDatabaseId; - relationFileOid = foreignTableId; - } - - cstoreFilePath = makeStringInfo(); - appendStringInfo(cstoreFilePath, "%s/%s/%u/%u", DataDir, CSTORE_FDW_NAME, - databaseOid, relationFileOid); - - return cstoreFilePath->data; -} - - /* * CStoreGetForeignRelSize obtains relation size estimates for a foreign table and * puts its estimate for row count into baserel->rows. @@ -1537,14 +1407,14 @@ CStoreDefaultFilePath(Oid foreignTableId) static void CStoreGetForeignRelSize(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { - CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); - double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, - cstoreOptions->filename); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); + double tupleCountEstimate = TupleCountEstimate(relation, baserel); double rowSelectivity = clauselist_selectivity(root, baserel->baserestrictinfo, 0, JOIN_INNER, NULL); double outputRowCount = clamp_row_est(tupleCountEstimate * rowSelectivity); baserel->rows = outputRowCount; + heap_close(relation, AccessShareLock); } @@ -1558,7 +1428,6 @@ static void CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId) { Path *foreignScanPath = NULL; - CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableId); Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); /* @@ -1579,15 +1448,14 @@ CStoreGetForeignPaths(PlannerInfo *root, RelOptInfo *baserel, Oid foreignTableId */ List *queryColumnList = ColumnList(baserel, foreignTableId); uint32 queryColumnCount = list_length(queryColumnList); - BlockNumber relationPageCount = PageCount(cstoreOptions->filename); + BlockNumber relationPageCount = PageCount(relation); uint32 relationColumnCount = RelationGetNumberOfAttributes(relation); double queryColumnRatio = (double) queryColumnCount / relationColumnCount; double queryPageCount = relationPageCount * queryColumnRatio; double totalDiskAccessCost = seq_page_cost * queryPageCount; - double tupleCountEstimate = TupleCountEstimate(foreignTableId, baserel, - cstoreOptions->filename); + double tupleCountEstimate = TupleCountEstimate(relation, baserel); /* * We estimate costs almost the same way as cost_seqscan(), thus assuming @@ -1692,7 +1560,7 @@ CStoreGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreignTableI * file. */ static double -TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename) +TupleCountEstimate(Relation relation, RelOptInfo *baserel) { double tupleCountEstimate = 0.0; @@ -1705,13 +1573,13 @@ TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename) * that by the current file size. */ double tupleDensity = baserel->tuples / (double) baserel->pages; - BlockNumber pageCount = PageCount(filename); + BlockNumber pageCount = PageCount(relation); tupleCountEstimate = clamp_row_est(tupleDensity * (double) pageCount); } else { - tupleCountEstimate = (double) CStoreTableRowCount(relid, filename); + tupleCountEstimate = (double) CStoreTableRowCount(relation); } return tupleCountEstimate; @@ -1720,25 +1588,14 @@ TupleCountEstimate(Oid relid, RelOptInfo *baserel, const char *filename) /* PageCount calculates and returns the number of pages in a file. */ static BlockNumber -PageCount(const char *filename) +PageCount(Relation relation) { - BlockNumber pageCount = 0; - struct stat statBuffer; + BlockNumber nblocks; - /* if file doesn't exist at plan time, use default estimate for its size */ - int statResult = stat(filename, &statBuffer); - if (statResult < 0) - { - statBuffer.st_size = 10 * BLCKSZ; - } + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); - pageCount = (statBuffer.st_size + (BLCKSZ - 1)) / BLCKSZ; - if (pageCount < 1) - { - pageCount = 1; - } - - return pageCount; + return (nblocks > 0) ? nblocks : 1; } @@ -1856,25 +1713,18 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) { - Relation relation = scanState->ss.ss_currentRelation; - CStoreOptions *cstoreOptions; - Oid foreignTableId; + Relation relation = scanState->ss.ss_currentRelation; cstore_fdw_initrel(relation); - foreignTableId = RelationGetRelid(relation); - cstoreOptions = CStoreGetOptions(foreignTableId); /* supress file size if we're not showing cost details */ if (explainState->costs) { - struct stat statBuffer; - - int statResult = stat(cstoreOptions->filename, &statBuffer); - if (statResult == 0) - { - ExplainPropertyLong("CStore File Size", (long) statBuffer.st_size, - explainState); - } + long nblocks; + RelationOpenSmgr(relation); + nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + ExplainPropertyLong("CStore File Size", (long) (nblocks * BLCKSZ), + explainState); } } @@ -1909,8 +1759,9 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) whereClauseList = foreignScan->scan.plan.qual; columnList = (List *) linitial(foreignPrivateList); - readState = CStoreBeginRead(foreignTableId, cstoreOptions->filename, + readState = CStoreBeginRead(foreignTableId, tupleDescriptor, columnList, whereClauseList); + readState->relation = cstore_fdw_open(foreignTableId, AccessShareLock); scanState->fdw_state = (void *) readState; } @@ -1956,6 +1807,7 @@ CStoreEndForeignScan(ForeignScanState *scanState) TableReadState *readState = (TableReadState *) scanState->fdw_state; if (readState != NULL) { + heap_close(readState->relation, AccessShareLock); CStoreEndRead(readState); } } @@ -1979,22 +1831,9 @@ CStoreAnalyzeForeignTable(Relation relation, AcquireSampleRowsFunc *acquireSampleRowsFunc, BlockNumber *totalPageCount) { - Oid foreignTableId = RelationGetRelid(relation); - CStoreOptions *cstoreOptions; - struct stat statBuffer; - cstore_fdw_initrel(relation); - cstoreOptions = CStoreGetOptions(foreignTableId); - - int statResult = stat(cstoreOptions->filename, &statBuffer); - if (statResult < 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", - cstoreOptions->filename))); - } - - (*totalPageCount) = PageCount(cstoreOptions->filename); + RelationOpenSmgr(relation); + (*totalPageCount) = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); (*acquireSampleRowsFunc) = CStoreAcquireSampleRows; return true; @@ -2267,7 +2106,6 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); writeState = CStoreBeginWrite(foreignTableOid, - cstoreOptions->filename, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, @@ -2372,7 +2210,7 @@ cstore_fdw_initrel(Relation rel) { #if PG_VERSION_NUM >= 120000 if (rel->rd_rel->relfilenode == InvalidOid) - InitializeRelFileNode(rel); + InitializeRelFileNode(rel, false); /* * Copied code from RelationInitPhysicalAddr(), which doesn't diff --git a/cstore_reader.c b/cstore_reader.c index ddef3395a..4cbe2a44f 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -38,7 +38,7 @@ #include "cstore_version_compat.h" /* static function declarations */ -static StripeBuffers * LoadFilteredStripeBuffers(FILE *tableFile, +static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, TupleDesc tupleDescriptor, @@ -48,12 +48,12 @@ static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColum uint64 blockIndex, uint64 blockRowIndex, ColumnBlockData **blockDataArray, Datum *columnValues, bool *columnNulls); -static ColumnBuffers * LoadColumnBuffers(FILE *tableFile, +static ColumnBuffers * LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, Form_pg_attribute attributeForm); -static StripeSkipList * LoadStripeSkipList(FILE *tableFile, +static StripeSkipList * LoadStripeSkipList(Relation relation, StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, uint32 columnCount, @@ -82,10 +82,10 @@ static void DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex TupleDesc tupleDescriptor); static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm); -static StringInfo ReadFromFile(FILE *file, uint64 offset, uint32 size); +static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount); -static uint64 StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata); +static uint64 StripeRowCount(Relation relation, StripeMetadata *stripeMetadata); static int RelationColumnCount(Oid relid); @@ -94,12 +94,11 @@ static int RelationColumnCount(Oid relid); * read handle that's used during reading rows and finishing the read operation. */ TableReadState * -CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, +CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; TableMetadata *tableMetadata = NULL; - FILE *tableFile = NULL; MemoryContext stripeReadContext = NULL; uint32 columnCount = 0; bool *projectedColumnMask = NULL; @@ -107,14 +106,6 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, tableMetadata = ReadTableMetadata(relationId); - tableFile = AllocateFile(filename, PG_BINARY_R); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for reading: %m", - filename))); - } - /* * We allocate all stripe specific data in the stripeReadContext, and reset * this memory context before loading a new stripe. This is to avoid memory @@ -131,7 +122,6 @@ CStoreBeginRead(Oid relationId, const char *filename, TupleDesc tupleDescriptor, readState = palloc0(sizeof(TableReadState)); readState->relationId = relationId; - readState->tableFile = tableFile; readState->tableMetadata = tableMetadata; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; @@ -187,7 +177,8 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu stripeFooter = ReadStripeFooter(readState->relationId, stripeMetadata->id, readState->tupleDescriptor->natts); - stripeBuffers = LoadFilteredStripeBuffers(readState->tableFile, stripeMetadata, + stripeBuffers = LoadFilteredStripeBuffers(readState->relation, + stripeMetadata, stripeFooter, readState->tupleDescriptor, readState->projectedColumnList, @@ -263,7 +254,6 @@ CStoreEndRead(TableReadState *readState) int columnCount = readState->tupleDescriptor->natts; MemoryContextDelete(readState->stripeReadContext); - FreeFile(readState->tableFile); list_free_deep(readState->tableMetadata->stripeMetadataList); FreeColumnBlockDataArray(readState->blockDataArray, columnCount); pfree(readState->tableMetadata); @@ -326,30 +316,20 @@ FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) /* CStoreTableRowCount returns the exact row count of a table using skiplists */ uint64 -CStoreTableRowCount(Oid relid, const char *filename) +CStoreTableRowCount(Relation relation) { TableMetadata *tableMetadata = NULL; - FILE *tableFile; ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - tableMetadata = ReadTableMetadata(relid); - - tableFile = AllocateFile(filename, PG_BINARY_R); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for reading: %m", filename))); - } + tableMetadata = ReadTableMetadata(relation->rd_id); foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); - totalRowCount += StripeRowCount(relid, tableFile, stripeMetadata); + totalRowCount += StripeRowCount(relation, stripeMetadata); } - FreeFile(tableFile); - return totalRowCount; } @@ -359,15 +339,15 @@ CStoreTableRowCount(Oid relid, const char *filename) * skip list, and returns number of rows for given stripe. */ static uint64 -StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata) +StripeRowCount(Relation relation, StripeMetadata *stripeMetadata) { uint64 rowCount = 0; StringInfo firstColumnSkipListBuffer = NULL; - StripeFooter *stripeFooter = ReadStripeFooter(relid, stripeMetadata->id, - RelationColumnCount(relid)); + StripeFooter *stripeFooter = ReadStripeFooter(relation->rd_id, stripeMetadata->id, + RelationColumnCount(relation->rd_id)); - firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, + firstColumnSkipListBuffer = ReadFromSmgr(relation, stripeMetadata->fileOffset, stripeFooter->skipListSizeArray[0]); rowCount = DeserializeRowCount(firstColumnSkipListBuffer); @@ -381,7 +361,7 @@ StripeRowCount(Oid relid, FILE *tableFile, StripeMetadata *stripeMetadata) * and only loads columns that are projected in the query. */ static StripeBuffers * -LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, +LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { @@ -393,7 +373,7 @@ LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - StripeSkipList *stripeSkipList = LoadStripeSkipList(tableFile, stripeMetadata, + StripeSkipList *stripeSkipList = LoadStripeSkipList(relation, stripeMetadata, stripeFooter, columnCount, projectedColumnMask, tupleDescriptor); @@ -423,7 +403,7 @@ LoadFilteredStripeBuffers(FILE *tableFile, StripeMetadata *stripeMetadata, Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); uint32 blockCount = selectedBlockSkipList->blockCount; - ColumnBuffers *columnBuffers = LoadColumnBuffers(tableFile, blockSkipNode, + ColumnBuffers *columnBuffers = LoadColumnBuffers(relation, blockSkipNode, blockCount, existsFileOffset, valueFileOffset, @@ -482,7 +462,7 @@ ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, * and lengths are retrieved from the column block skip node array. */ static ColumnBuffers * -LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, +LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, Form_pg_attribute attributeForm) { @@ -505,7 +485,7 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, { ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; uint64 existsOffset = existsFileOffset + blockSkipNode->existsBlockOffset; - StringInfo rawExistsBuffer = ReadFromFile(tableFile, existsOffset, + StringInfo rawExistsBuffer = ReadFromSmgr(relation, existsOffset, blockSkipNode->existsLength); blockBuffersArray[blockIndex]->existsBuffer = rawExistsBuffer; @@ -517,7 +497,7 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; CompressionType compressionType = blockSkipNode->valueCompressionType; uint64 valueOffset = valueFileOffset + blockSkipNode->valueBlockOffset; - StringInfo rawValueBuffer = ReadFromFile(tableFile, valueOffset, + StringInfo rawValueBuffer = ReadFromSmgr(relation, valueOffset, blockSkipNode->valueLength); blockBuffersArray[blockIndex]->valueBuffer = rawValueBuffer; @@ -533,7 +513,8 @@ LoadColumnBuffers(FILE *tableFile, ColumnBlockSkipNode *blockSkipNodeArray, /* Reads the skip list for the given stripe. */ static StripeSkipList * -LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, +LoadStripeSkipList(Relation relation, + StripeMetadata *stripeMetadata, StripeFooter *stripeFooter, uint32 columnCount, bool *projectedColumnMask, TupleDesc tupleDescriptor) @@ -547,7 +528,7 @@ LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, uint32 stripeColumnCount = stripeFooter->columnCount; /* deserialize block count */ - firstColumnSkipListBuffer = ReadFromFile(tableFile, stripeMetadata->fileOffset, + firstColumnSkipListBuffer = ReadFromSmgr(relation, stripeMetadata->fileOffset, stripeFooter->skipListSizeArray[0]); stripeBlockCount = DeserializeBlockCount(firstColumnSkipListBuffer); @@ -570,7 +551,7 @@ LoadStripeSkipList(FILE *tableFile, StripeMetadata *stripeMetadata, Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); StringInfo columnSkipListBuffer = - ReadFromFile(tableFile, currentColumnSkipListFileOffset, + ReadFromSmgr(relation, currentColumnSkipListFileOffset, columnSkipListSize); ColumnBlockSkipNode *columnSkipList = DeserializeColumnSkipList(columnSkipListBuffer, attributeForm->attbyval, @@ -1178,49 +1159,37 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor return defaultValue; } - -/* Reads the given segment from the given file. */ static StringInfo -ReadFromFile(FILE *file, uint64 offset, uint32 size) +ReadFromSmgr(Relation rel, uint64 offset, uint32 size) { - int fseekResult = 0; - int freadResult = 0; - int fileError = 0; + StringInfo resultBuffer = makeStringInfo(); + uint64 read = 0; - StringInfo resultBuffer = makeStringInfo(); enlargeStringInfo(resultBuffer, size); resultBuffer->len = size; - if (size == 0) + while (read < size) { - return resultBuffer; - } + Buffer buffer; + Page page; + PageHeader phdr; + uint32 to_read; + SmgrAddr addr = logical_to_smgr(offset + read); + uint32 pageoffset = addr.offset + SizeOfPageHeaderData; - errno = 0; - fseekResult = fseeko(file, offset, SEEK_SET); - if (fseekResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek in file: %m"))); - } + buffer = ReadBuffer(rel, addr.blockno); + page = BufferGetPage(buffer); + phdr = (PageHeader)page; - freadResult = fread(resultBuffer->data, size, 1, file); - if (freadResult != 1) - { - ereport(ERROR, (errmsg("could not read enough data from file"))); - } - - fileError = ferror(file); - if (fileError != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read file: %m"))); + to_read = Min(size - read, phdr->pd_upper - pageoffset); + memcpy(resultBuffer->data + read, page + pageoffset, to_read); + ReleaseBuffer(buffer); + read += to_read; } return resultBuffer; } - /* * ResetUncompressedBlockData iterates over deserialized column block data * and sets valueBuffer field to empty buffer. This field is allocated in stripe diff --git a/cstore_writer.c b/cstore_writer.c index 240c13fc2..76e3aa070 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -16,12 +16,12 @@ #include "postgres.h" -#include - #include "access/nbtree.h" #include "catalog/pg_am.h" #include "storage/fd.h" +#include "storage/smgr.h" #include "utils/memutils.h" +#include "utils/rel.h" #include "cstore.h" #include "cstore_metadata_serialization.h" @@ -51,8 +51,6 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); static void AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata); -static void WriteToFile(FILE *file, void *data, uint32 dataLength); -static void SyncAndCloseFile(FILE *file); static StringInfo CopyStringInfo(StringInfo sourceString); @@ -65,12 +63,11 @@ static StringInfo CopyStringInfo(StringInfo sourceString); */ TableWriteState * CStoreBeginWrite(Oid relationId, - const char *filename, CompressionType compressionType, + CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) { TableWriteState *writeState = NULL; - FILE *tableFile = NULL; TableMetadata *tableMetadata = NULL; FmgrInfo **comparisonFunctionArray = NULL; MemoryContext stripeWriteContext = NULL; @@ -81,14 +78,6 @@ CStoreBeginWrite(Oid relationId, ColumnBlockData **blockData = NULL; uint64 currentStripeId = 0; - tableFile = AllocateFile(filename, "a+"); - if (tableFile == NULL) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not open file \"%s\" for writing: %m", - filename))); - } - tableMetadata = ReadTableMetadata(relationId); /* @@ -99,7 +88,6 @@ CStoreBeginWrite(Oid relationId, { StripeMetadata *lastStripe = NULL; uint64 lastStripeSize = 0; - int fseekResult = 0; lastStripe = llast(tableMetadata->stripeMetadataList); lastStripeSize += lastStripe->skipListLength; @@ -108,14 +96,6 @@ CStoreBeginWrite(Oid relationId, currentFileOffset = lastStripe->fileOffset + lastStripeSize; currentStripeId = lastStripe->id + 1; - - errno = 0; - fseekResult = fseeko(tableFile, currentFileOffset, SEEK_SET); - if (fseekResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not seek in file \"%s\": %m", filename))); - } } /* get comparison function pointers for each of the columns */ @@ -154,7 +134,6 @@ CStoreBeginWrite(Oid relationId, writeState = palloc0(sizeof(TableWriteState)); writeState->relationId = relationId; - writeState->tableFile = tableFile; writeState->tableMetadata = tableMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; @@ -312,8 +291,6 @@ CStoreEndWrite(TableWriteState *writeState) AppendStripeMetadata(writeState->tableMetadata, stripeMetadata); } - SyncAndCloseFile(writeState->tableFile); - MemoryContextDelete(writeState->stripeWriteContext); list_free_deep(writeState->tableMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); @@ -391,6 +368,56 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, return stripeSkipList; } +static void +WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) +{ + uint64 logicalOffset = writeState->currentFileOffset; + uint64 remaining = dataLength; + Relation rel = writeState->relation; + Buffer buffer; + + while (remaining > 0) + { + SmgrAddr addr = logical_to_smgr(logicalOffset); + BlockNumber nblocks; + Page page; + PageHeader phdr; + uint64 to_write; + + RelationOpenSmgr(rel); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + + while (addr.blockno >= nblocks) + { + Buffer buffer = ReadBuffer(rel, P_NEW); + ReleaseBuffer(buffer); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + } + + RelationCloseSmgr(rel); + + buffer = ReadBuffer(rel, addr.blockno); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + page = BufferGetPage(buffer); + phdr = (PageHeader) page; + if (PageIsNew(page)) + PageInit(page, BLCKSZ, 0); + + /* always appending */ + Assert(phdr->pd_lower == addr.offset + SizeOfPageHeaderData); + + to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); + memcpy(page + phdr->pd_lower, data, to_write); + phdr->pd_lower += to_write; + + UnlockReleaseBuffer(buffer); + + data += to_write; + remaining -= to_write; + logicalOffset += to_write; + } +} /* * FlushStripe flushes current stripe data into the file. The function first ensures @@ -409,7 +436,6 @@ FlushStripe(TableWriteState *writeState) uint32 columnIndex = 0; uint32 blockIndex = 0; TableMetadata *tableMetadata = writeState->tableMetadata; - FILE *tableFile = writeState->tableFile; StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; @@ -419,6 +445,7 @@ FlushStripe(TableWriteState *writeState) uint32 blockRowCount = tableMetadata->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; + uint64 initialFileOffset = writeState->currentFileOffset; /* * check if the last block needs serialization , the last block was not serialized @@ -479,7 +506,8 @@ FlushStripe(TableWriteState *writeState) for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { StringInfo skipListBuffer = skipListBufferArray[columnIndex]; - WriteToFile(tableFile, skipListBuffer->data, skipListBuffer->len); + WriteToSmgr(writeState, skipListBuffer->data, skipListBuffer->len); + writeState->currentFileOffset += skipListBuffer->len; } /* then, we flush the data buffers */ @@ -494,7 +522,8 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo existsBuffer = blockBuffers->existsBuffer; - WriteToFile(tableFile, existsBuffer->data, existsBuffer->len); + WriteToSmgr(writeState, existsBuffer->data, existsBuffer->len); + writeState->currentFileOffset += existsBuffer->len; } for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) @@ -503,7 +532,8 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = blockBuffers->valueBuffer; - WriteToFile(tableFile, valueBuffer->data, valueBuffer->len); + WriteToSmgr(writeState, valueBuffer->data, valueBuffer->len); + writeState->currentFileOffset += valueBuffer->len; } } @@ -520,16 +550,12 @@ FlushStripe(TableWriteState *writeState) dataLength += stripeFooter->valueSizeArray[columnIndex]; } - stripeMetadata.fileOffset = writeState->currentFileOffset; + stripeMetadata.fileOffset = initialFileOffset; stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; stripeMetadata.footerLength = 0; stripeMetadata.id = writeState->currentStripeId; - /* advance current file offset */ - writeState->currentFileOffset += skipListLength; - writeState->currentFileOffset += dataLength; - return stripeMetadata; } @@ -834,76 +860,6 @@ AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata stripeMetadataCopy); } - -/* Writes the given data to the given file pointer and checks for errors. */ -static void -WriteToFile(FILE *file, void *data, uint32 dataLength) -{ - int writeResult = 0; - int errorResult = 0; - - if (dataLength == 0) - { - return; - } - - errno = 0; - writeResult = fwrite(data, dataLength, 1, file); - if (writeResult != 1) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not write file: %m"))); - } - - errorResult = ferror(file); - if (errorResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("error in file: %m"))); - } -} - - -/* Flushes, syncs, and closes the given file pointer and checks for errors. */ -static void -SyncAndCloseFile(FILE *file) -{ - int flushResult = 0; - int syncResult = 0; - int errorResult = 0; - int freeResult = 0; - - errno = 0; - flushResult = fflush(file); - if (flushResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not flush file: %m"))); - } - - syncResult = pg_fsync(fileno(file)); - if (syncResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not sync file: %m"))); - } - - errorResult = ferror(file); - if (errorResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("error in file: %m"))); - } - - freeResult = FreeFile(file); - if (freeResult != 0) - { - ereport(ERROR, (errcode_for_file_access(), - errmsg("could not close file: %m"))); - } -} - - /* * CopyStringInfo creates a deep copy of given source string allocating only needed * amount of memory. diff --git a/expected/drop.out b/expected/drop.out index dc5678da7..926f69337 100644 --- a/expected/drop.out +++ b/expected/drop.out @@ -12,17 +12,6 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset --- Check that files for the automatically managed table exist in the --- cstore_fdw/{databaseoid} directory. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 2 -(1 row) - -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; @@ -31,17 +20,6 @@ CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to foreign table test_schema.test_table --- Check that the files have been deleted and the directory is empty after the --- DROP table command. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 0 -(1 row) - SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop @@ -49,49 +27,14 @@ CREATE EXTENSION cstore_fdw; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; --- should see 2 files, data and footer file for single table -SELECT count(*) FROM pg_ls_dir('cstore_fdw/' || :databaseoid); - count -------- - 2 -(1 row) - --- should see 2 directories 1 for each database, excluding postgres database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - count -------- - 2 -(1 row) - DROP EXTENSION cstore_fdw CASCADE; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to server cstore_server drop cascades to foreign table test_table --- should only see 1 directory here -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - count -------- - 1 -(1 row) - -- test database drop CREATE EXTENSION cstore_fdw; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; --- should see 2 directories 1 for each database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - count -------- - 2 -(1 row) - \c :datname DROP DATABASE db_to_drop; --- should only see 1 directory for the default database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - count -------- - 1 -(1 row) - diff --git a/expected/truncate.out b/expected/truncate.out index 14119c804..c92c15559 100644 --- a/expected/truncate.out +++ b/expected/truncate.out @@ -9,17 +9,6 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; t (1 row) --- Check that files for the automatically managed table exist in the --- cstore_fdw/{databaseoid} directory. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 0 -(1 row) - -- CREATE a cstore_fdw table, fill with some data -- CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; @@ -75,16 +64,6 @@ SELECT cstore_table_size('cstore_truncate_test_compressed'); 0 (1 row) --- make sure data files still present -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 3 -(1 row) - INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; @@ -250,13 +229,3 @@ SELECT count(*) FROM truncate_schema.truncate_tbl; DROP SCHEMA truncate_schema CASCADE; NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl DROP USER truncate_user; --- verify files are removed -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - count -------- - 0 -(1 row) - diff --git a/input/block_filtering.source b/input/block_filtering.source index 4451262d4..dc3170f0d 100644 --- a/input/block_filtering.source +++ b/input/block_filtering.source @@ -30,8 +30,7 @@ $$ LANGUAGE PLPGSQL; -- Create and load data CREATE FOREIGN TABLE test_block_filtering (a int) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/block_filtering.cstore', - block_row_count '1000', stripe_row_count '2000'); + OPTIONS(block_row_count '1000', stripe_row_count '2000'); COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; @@ -60,8 +59,7 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET -- Verify that we are fine with collations which use a different alphabet order CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/collation_block_filtering.cstore'); + SERVER cstore_server; COPY collation_block_filtering_test FROM STDIN; A Å diff --git a/input/copyto.source b/input/copyto.source index 96403a3f4..a4b753a8d 100644 --- a/input/copyto.source +++ b/input/copyto.source @@ -3,8 +3,7 @@ -- CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/test_contestant.cstore'); + SERVER cstore_server; -- load table data from file COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; diff --git a/input/create.source b/input/create.source index fbd27dc50..ba52137c1 100644 --- a/input/create.source +++ b/input/create.source @@ -12,30 +12,24 @@ CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests CREATE FOREIGN TABLE test_validator_invalid_option () SERVER cstore_server - OPTIONS(filename 'data.cstore', bad_option_name '1'); -- ERROR + OPTIONS(bad_option_name '1'); -- ERROR CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () SERVER cstore_server - OPTIONS(filename 'data.cstore', stripe_row_count '0'); -- ERROR + OPTIONS(stripe_row_count '0'); -- ERROR CREATE FOREIGN TABLE test_validator_invalid_block_row_count () SERVER cstore_server - OPTIONS(filename 'data.cstore', block_row_count '0'); -- ERROR + OPTIONS(block_row_count '0'); -- ERROR CREATE FOREIGN TABLE test_validator_invalid_compression_type () SERVER cstore_server - OPTIONS(filename 'data.cstore', compression 'invalid_compression'); -- ERROR - --- Invalid file path test -CREATE FOREIGN TABLE test_invalid_file_path () - SERVER cstore_server - OPTIONS(filename 'bad_directory_path/bad_file_path'); --ERROR + OPTIONS(compression 'invalid_compression'); -- ERROR -- Create uncompressed table CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/contestant.cstore'); + SERVER cstore_server; -- Create compressed table with automatically determined file path diff --git a/input/data_types.source b/input/data_types.source index c3398c67b..ec83c4d8c 100644 --- a/input/data_types.source +++ b/input/data_types.source @@ -11,8 +11,7 @@ SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/array_types.cstore'); + text_array text[]) SERVER cstore_server; COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; @@ -22,8 +21,7 @@ SELECT * FROM test_array_types; -- Test date/time types CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/datetime_types.cstore'); + interval interval) SERVER cstore_server; COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; @@ -35,8 +33,7 @@ CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/enum_and_composite_types.cstore'); + composite composite_type) SERVER cstore_server; COPY test_enum_and_composite_types FROM '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; @@ -46,8 +43,7 @@ SELECT * FROM test_enum_and_composite_types; -- Test range types CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/range_types.cstore'); + numrange numrange, tsrange tsrange) SERVER cstore_server; COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; @@ -56,8 +52,7 @@ SELECT * FROM test_range_types; -- Test other types CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/other_types.cstore'); + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; @@ -66,8 +61,7 @@ SELECT * FROM test_other_types; -- Test null values CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/null_values.cstore'); + SERVER cstore_server; COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; diff --git a/output/block_filtering.source b/output/block_filtering.source index 21e1eb772..2f664a78a 100644 --- a/output/block_filtering.source +++ b/output/block_filtering.source @@ -26,8 +26,7 @@ $$ LANGUAGE PLPGSQL; -- Create and load data CREATE FOREIGN TABLE test_block_filtering (a int) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/block_filtering.cstore', - block_row_count '1000', stripe_row_count '2000'); + OPTIONS(block_row_count '1000', stripe_row_count '2000'); COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; -- Verify that filtered_row_count is less than 1000 for the following queries SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); @@ -107,8 +106,7 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET -- Verify that we are fine with collations which use a different alphabet order CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/collation_block_filtering.cstore'); + SERVER cstore_server; COPY collation_block_filtering_test FROM STDIN; SELECT * FROM collation_block_filtering_test WHERE A > 'B'; a diff --git a/output/copyto.source b/output/copyto.source index 6024dd205..a8d841f18 100644 --- a/output/copyto.source +++ b/output/copyto.source @@ -3,8 +3,7 @@ -- CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/test_contestant.cstore'); + SERVER cstore_server; -- load table data from file COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- export using COPY table TO ... diff --git a/output/create.source b/output/create.source index 937afa2a0..961c0494d 100644 --- a/output/create.source +++ b/output/create.source @@ -7,34 +7,28 @@ CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests CREATE FOREIGN TABLE test_validator_invalid_option () SERVER cstore_server - OPTIONS(filename 'data.cstore', bad_option_name '1'); -- ERROR + OPTIONS(bad_option_name '1'); -- ERROR ERROR: invalid option "bad_option_name" -HINT: Valid options in this context are: filename, compression, stripe_row_count, block_row_count +HINT: Valid options in this context are: compression, stripe_row_count, block_row_count CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () SERVER cstore_server - OPTIONS(filename 'data.cstore', stripe_row_count '0'); -- ERROR + OPTIONS(stripe_row_count '0'); -- ERROR ERROR: invalid stripe row count HINT: Stripe row count must be an integer between 1000 and 10000000 CREATE FOREIGN TABLE test_validator_invalid_block_row_count () SERVER cstore_server - OPTIONS(filename 'data.cstore', block_row_count '0'); -- ERROR + OPTIONS(block_row_count '0'); -- ERROR ERROR: invalid block row count HINT: Block row count must be an integer between 1000 and 100000 CREATE FOREIGN TABLE test_validator_invalid_compression_type () SERVER cstore_server - OPTIONS(filename 'data.cstore', compression 'invalid_compression'); -- ERROR + OPTIONS(compression 'invalid_compression'); -- ERROR ERROR: invalid compression type HINT: Valid options are: none, pglz --- Invalid file path test -CREATE FOREIGN TABLE test_invalid_file_path () - SERVER cstore_server - OPTIONS(filename 'bad_directory_path/bad_file_path'); --ERROR -ERROR: could not open file "bad_directory_path/bad_file_path" for writing: No such file or directory -- Create uncompressed table CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/contestant.cstore'); + SERVER cstore_server; -- Create compressed table with automatically determined file path CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) diff --git a/output/data_types.source b/output/data_types.source index efa03a663..23fdcfa29 100644 --- a/output/data_types.source +++ b/output/data_types.source @@ -7,8 +7,7 @@ SET timezone to 'GMT'; SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/array_types.cstore'); + text_array text[]) SERVER cstore_server; COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; SELECT * FROM test_array_types; int_array | bigint_array | text_array @@ -21,8 +20,7 @@ SELECT * FROM test_array_types; -- Test date/time types CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/datetime_types.cstore'); + interval interval) SERVER cstore_server; COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; SELECT * FROM test_datetime_types; timestamp | timestamp_with_timezone | date | time | interval @@ -35,8 +33,7 @@ SELECT * FROM test_datetime_types; CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/enum_and_composite_types.cstore'); + composite composite_type) SERVER cstore_server; COPY test_enum_and_composite_types FROM '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; SELECT * FROM test_enum_and_composite_types; @@ -48,8 +45,7 @@ SELECT * FROM test_enum_and_composite_types; -- Test range types CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/range_types.cstore'); + numrange numrange, tsrange tsrange) SERVER cstore_server; COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; SELECT * FROM test_range_types; int4range | int8range | numrange | tsrange @@ -60,8 +56,7 @@ SELECT * FROM test_range_types; -- Test other types CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/other_types.cstore'); + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; SELECT * FROM test_other_types; bool | bytea | money | inet | bitstring | uuid | json @@ -72,8 +67,7 @@ SELECT * FROM test_other_types; -- Test null values CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server - OPTIONS(filename '@abs_srcdir@/data/null_values.cstore'); + SERVER cstore_server; COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; SELECT * FROM test_null_values; a | b | c diff --git a/sql/drop.sql b/sql/drop.sql index a0852a279..c64b5c99b 100644 --- a/sql/drop.sql +++ b/sql/drop.sql @@ -15,13 +15,6 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset --- Check that files for the automatically managed table exist in the --- cstore_fdw/{databaseoid} directory. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; @@ -31,13 +24,6 @@ CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; DROP SCHEMA test_schema CASCADE; --- Check that the files have been deleted and the directory is empty after the --- DROP table command. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - SELECT current_database() datname \gset CREATE DATABASE db_to_drop; @@ -47,17 +33,9 @@ CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; --- should see 2 files, data and footer file for single table -SELECT count(*) FROM pg_ls_dir('cstore_fdw/' || :databaseoid); - --- should see 2 directories 1 for each database, excluding postgres database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; DROP EXTENSION cstore_fdw CASCADE; --- should only see 1 directory here -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - -- test database drop CREATE EXTENSION cstore_fdw; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; @@ -65,12 +43,6 @@ SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; --- should see 2 directories 1 for each database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; - \c :datname DROP DATABASE db_to_drop; - --- should only see 1 directory for the default database -SELECT count(*) FROM pg_ls_dir('cstore_fdw') WHERE pg_ls_dir != :postgres_oid::text; diff --git a/sql/truncate.sql b/sql/truncate.sql index 0aac2bd34..a1849045e 100644 --- a/sql/truncate.sql +++ b/sql/truncate.sql @@ -6,13 +6,6 @@ SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; --- Check that files for the automatically managed table exist in the --- cstore_fdw/{databaseoid} directory. -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - -- CREATE a cstore_fdw table, fill with some data -- CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; @@ -39,12 +32,6 @@ SELECT count(*) FROM cstore_truncate_test_compressed; SELECT cstore_table_size('cstore_truncate_test_compressed'); --- make sure data files still present -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; - INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; @@ -127,9 +114,3 @@ SELECT count(*) FROM truncate_schema.truncate_tbl; -- cleanup DROP SCHEMA truncate_schema CASCADE; DROP USER truncate_user; - --- verify files are removed -SELECT count(*) FROM ( - SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( - SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() - ) AS q1) AS q2; From 573555747f7c0637167d81dcea0f1cd551794040 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 16:28:57 -0700 Subject: [PATCH 016/124] address review comments --- cstore.h | 6 +++++- cstore_reader.c | 5 ++--- cstore_writer.c | 3 ++- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cstore.h b/cstore.h index 9679fea9c..d0f959032 100644 --- a/cstore.h +++ b/cstore.h @@ -300,6 +300,10 @@ typedef struct SmgrAddr uint32 offset; } SmgrAddr; +/* + * Map logical offsets (as tracked in the metadata) to a physical page and + * offset where the data is kept. + */ static inline SmgrAddr logical_to_smgr(uint64 logicalOffset) { @@ -307,7 +311,7 @@ logical_to_smgr(uint64 logicalOffset) SmgrAddr addr; addr.blockno = logicalOffset / bytes_per_page; - addr.offset = logicalOffset % bytes_per_page; + addr.offset = SizeOfPageHeaderData + (logicalOffset % bytes_per_page); return addr; } diff --git a/cstore_reader.c b/cstore_reader.c index 4cbe2a44f..654d74697 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -1175,14 +1175,13 @@ ReadFromSmgr(Relation rel, uint64 offset, uint32 size) PageHeader phdr; uint32 to_read; SmgrAddr addr = logical_to_smgr(offset + read); - uint32 pageoffset = addr.offset + SizeOfPageHeaderData; buffer = ReadBuffer(rel, addr.blockno); page = BufferGetPage(buffer); phdr = (PageHeader)page; - to_read = Min(size - read, phdr->pd_upper - pageoffset); - memcpy(resultBuffer->data + read, page + pageoffset, to_read); + to_read = Min(size - read, phdr->pd_upper - addr.offset); + memcpy(resultBuffer->data + read, page + addr.offset, to_read); ReleaseBuffer(buffer); read += to_read; } diff --git a/cstore_writer.c b/cstore_writer.c index 76e3aa070..319136dc9 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -405,12 +405,13 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) PageInit(page, BLCKSZ, 0); /* always appending */ - Assert(phdr->pd_lower == addr.offset + SizeOfPageHeaderData); + Assert(phdr->pd_lower == addr.offset); to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); memcpy(page + phdr->pd_lower, data, to_write); phdr->pd_lower += to_write; + MarkBufferDirty(buffer); UnlockReleaseBuffer(buffer); data += to_write; From fb110446bebdc73af8f8dd8b8b73f28551844f4c Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 14 Sep 2020 13:13:36 -0700 Subject: [PATCH 017/124] Fix compilation in pg 11 --- cstore_metadata_tables.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 3843e4cd6..5b6151ef6 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -14,9 +14,8 @@ #include #include "access/heapam.h" +#include "access/htup_details.h" #include "access/nbtree.h" -#include "access/table.h" -#include "access/tableam.h" #include "access/xact.h" #include "catalog/indexing.h" #include "catalog/pg_namespace.h" @@ -30,7 +29,6 @@ #include "miscadmin.h" #include "nodes/execnodes.h" #include "lib/stringinfo.h" -#include "optimizer/optimizer.h" #include "port.h" #include "storage/fd.h" #include "utils/fmgroids.h" @@ -263,7 +261,7 @@ DeleteTableMetadataRowIfExists(Oid relid) BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = table_open(cstoreTablesOid, AccessShareLock); + cstoreTables = heap_open(cstoreTablesOid, AccessShareLock); index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); @@ -278,7 +276,7 @@ DeleteTableMetadataRowIfExists(Oid relid) systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); - table_close(cstoreTables, NoLock); + heap_close(cstoreTables, NoLock); } @@ -412,9 +410,15 @@ InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls) { TupleDesc tupleDescriptor = RelationGetDescr(state->rel); HeapTuple tuple = heap_form_tuple(tupleDescriptor, values, nulls); + +#if PG_VERSION_NUM >= 120000 TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor, &TTSOpsHeapTuple); ExecStoreHeapTuple(tuple, slot, false); +#else + TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor); + ExecStoreTuple(tuple, slot, InvalidBuffer, false); +#endif /* use ExecSimpleRelationInsert to enforce constraints */ ExecSimpleRelationInsert(state->estate, slot); @@ -432,7 +436,7 @@ DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple) ResultRelInfo *resultRelInfo = estate->es_result_relation_info; ItemPointer tid = &(heapTuple->t_self); - simple_table_tuple_delete(state->rel, tid, estate->es_snapshot); + simple_heap_delete(state->rel, tid); /* execute AFTER ROW DELETE Triggers to enforce constraints */ ExecARDeleteTriggers(estate, resultRelInfo, tid, NULL, NULL); @@ -476,8 +480,10 @@ create_estate_for_relation(Relation rel) rte->rtekind = RTE_RELATION; rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; +#if PG_VERSION_NUM >= 120000 rte->rellockmode = AccessShareLock; ExecInitRangeTable(estate, list_make1(rte)); +#endif resultRelInfo = makeNode(ResultRelInfo); InitResultRelInfo(resultRelInfo, rel, 1, NULL, 0); @@ -488,6 +494,12 @@ create_estate_for_relation(Relation rel) estate->es_output_cid = GetCurrentCommandId(true); +#if PG_VERSION_NUM < 120000 + /* Triggers might need a slot */ + if (resultRelInfo->ri_TrigDesc) + estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL); +#endif + /* Prepare to catch AFTER triggers. */ AfterTriggerBeginQuery(); From 2737686fd0d3c3a490cbf0c87ae56b69b403dde6 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 14 Sep 2020 14:54:22 -0700 Subject: [PATCH 018/124] Move skipnodes to metadata tables --- Makefile | 9 +- cstore.h | 10 +- cstore.proto | 24 --- cstore_fdw--1.7.sql | 29 ++- cstore_metadata_serialization.c | 302 -------------------------------- cstore_metadata_serialization.h | 31 ---- cstore_metadata_tables.c | 300 +++++++++++++++++++++++++++++-- cstore_reader.c | 146 +-------------- cstore_writer.c | 81 ++------- 9 files changed, 342 insertions(+), 590 deletions(-) delete mode 100644 cstore.proto delete mode 100644 cstore_metadata_serialization.c delete mode 100644 cstore_metadata_serialization.h diff --git a/Makefile b/Makefile index 10d7fcc14..b8277f3a2 100644 --- a/Makefile +++ b/Makefile @@ -6,10 +6,8 @@ MODULE_big = cstore_fdw PG_CPPFLAGS = -std=c11 -SHLIB_LINK = -lprotobuf-c -OBJS = cstore.pb-c.o cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_metadata_serialization.o cstore_compression.o mod.o \ - cstore_metadata_tables.o +OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ + cstore_compression.o mod.o cstore_metadata_tables.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ @@ -51,9 +49,6 @@ ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) $(error PostgreSQL 9.3 to 12 is required to compile this extension) endif -cstore.pb-c.c: cstore.proto - protoc-c --c_out=. cstore.proto - installcheck: remove_cstore_files remove_cstore_files: diff --git a/cstore.h b/cstore.h index d0f959032..c7f849cc9 100644 --- a/cstore.h +++ b/cstore.h @@ -81,9 +81,9 @@ typedef struct CStoreOptions typedef struct StripeMetadata { uint64 fileOffset; - uint64 skipListLength; uint64 dataLength; - uint64 footerLength; + uint32 blockCount; + uint64 rowCount; uint64 id; } StripeMetadata; @@ -191,7 +191,6 @@ typedef struct StripeBuffers typedef struct StripeFooter { uint32 columnCount; - uint64 *skipListSizeArray; uint64 *existsSizeArray; uint64 *valueSizeArray; } StripeFooter; @@ -293,6 +292,11 @@ extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationCol extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); extern TableMetadata * ReadTableMetadata(Oid relid); +extern void SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, + TupleDesc tupleDescriptor); +extern StripeSkipList * ReadStripeSkipList(Oid relid, uint64 stripe, + TupleDesc tupleDescriptor, + uint32 blockCount); typedef struct SmgrAddr { diff --git a/cstore.proto b/cstore.proto deleted file mode 100644 index a7525b633..000000000 --- a/cstore.proto +++ /dev/null @@ -1,24 +0,0 @@ -syntax = "proto2"; - -package protobuf; - -enum CompressionType { - // Values should match with the corresponding struct in cstore_fdw.h - NONE = 0; - PG_LZ = 1; -}; - -message ColumnBlockSkipNode { - optional uint64 rowCount = 1; - optional bytes minimumValue = 2; - optional bytes maximumValue = 3; - optional uint64 valueBlockOffset = 4; - optional uint64 valueLength = 5; - optional CompressionType valueCompressionType = 6; - optional uint64 existsBlockOffset = 7; - optional uint64 existsLength = 8; -} - -message ColumnBlockSkipList { - repeated ColumnBlockSkipNode blockSkipNodeArray = 1; -} diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 7a0c9c7b8..1c19fda50 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -68,23 +68,48 @@ CREATE TABLE cstore.cstore_tables ( PRIMARY KEY (relid) ) WITH (user_catalog_table = true); +COMMENT ON TABLE cstore.cstore_tables IS 'CStore table wide metadata'; + CREATE TABLE cstore.cstore_stripes ( relid oid NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, - skiplist_length bigint NOT NULL, data_length bigint NOT NULL, + block_count int NOT NULL, + row_count bigint NOT NULL, PRIMARY KEY (relid, stripe), FOREIGN KEY (relid) REFERENCES cstore.cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); +COMMENT ON TABLE cstore.cstore_tables IS 'CStore per stripe metadata'; + CREATE TABLE cstore.cstore_stripe_attr ( relid oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, exists_size bigint NOT NULL, value_size bigint NOT NULL, - skiplist_size bigint NOT NULL, PRIMARY KEY (relid, stripe, attr), FOREIGN KEY (relid, stripe) REFERENCES cstore.cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); + +COMMENT ON TABLE cstore.cstore_tables IS 'CStore per stripe/column combination metadata'; + +CREATE TABLE cstore.cstore_skipnodes ( + relid oid NOT NULL, + stripe bigint NOT NULL, + attr int NOT NULL, + block int NOT NULL, + row_count bigint NOT NULL, + minimum_value bytea, + maximum_value bytea, + value_stream_offset bigint NOT NULL, + value_stream_length bigint NOT NULL, + exists_stream_offset bigint NOT NULL, + exists_stream_length bigint NOT NULL, + value_compression_type int NOT NULL, + PRIMARY KEY (relid, stripe, attr, block), + FOREIGN KEY (relid, stripe, attr) REFERENCES cstore.cstore_stripe_attr(relid, stripe, attr) ON DELETE CASCADE INITIALLY DEFERRED +) WITH (user_catalog_table = true); + +COMMENT ON TABLE cstore.cstore_tables IS 'CStore per block metadata'; diff --git a/cstore_metadata_serialization.c b/cstore_metadata_serialization.c deleted file mode 100644 index 2b06d4a15..000000000 --- a/cstore_metadata_serialization.c +++ /dev/null @@ -1,302 +0,0 @@ -/*------------------------------------------------------------------------- - * - * cstore_metadata_serialization.c - * - * This file contains function definitions for serializing/deserializing cstore - * metadata. - * - * Copyright (c) 2016, Citus Data, Inc. - * - * $Id$ - * - *------------------------------------------------------------------------- - */ - - -#include "postgres.h" - -#include "access/tupmacs.h" - -#include "cstore.h" -#include "cstore_metadata_serialization.h" -#include "cstore.pb-c.h" - -/* local functions forward declarations */ -static ProtobufCBinaryData DatumToProtobufBinary(Datum datum, bool typeByValue, - int typeLength); -static Datum ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, - bool typeByValue, int typeLength); - - -/* - * SerializeColumnSkipList serializes a column skip list, where the colum skip - * list includes all block skip nodes for that column. The function then returns - * the result as a string info. - */ -StringInfo -SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, - bool typeByValue, int typeLength) -{ - StringInfo blockSkipListBuffer = NULL; - Protobuf__ColumnBlockSkipList protobufBlockSkipList = - PROTOBUF__COLUMN_BLOCK_SKIP_LIST__INIT; - Protobuf__ColumnBlockSkipNode **protobufBlockSkipNodeArray = NULL; - uint32 blockIndex = 0; - uint8 *blockSkipListData = NULL; - uint32 blockSkipListSize = 0; - - protobufBlockSkipNodeArray = palloc0(blockCount * - sizeof(Protobuf__ColumnBlockSkipNode *)); - for (blockIndex = 0; blockIndex < blockCount; blockIndex++) - { - ColumnBlockSkipNode blockSkipNode = blockSkipNodeArray[blockIndex]; - Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; - ProtobufCBinaryData binaryMinimumValue = { 0, 0 }; - ProtobufCBinaryData binaryMaximumValue = { 0, 0 }; - - if (blockSkipNode.hasMinMax) - { - binaryMinimumValue = DatumToProtobufBinary(blockSkipNode.minimumValue, - typeByValue, typeLength); - binaryMaximumValue = DatumToProtobufBinary(blockSkipNode.maximumValue, - typeByValue, typeLength); - } - - protobufBlockSkipNode = palloc0(sizeof(Protobuf__ColumnBlockSkipNode)); - protobuf__column_block_skip_node__init(protobufBlockSkipNode); - protobufBlockSkipNode->has_rowcount = true; - protobufBlockSkipNode->rowcount = blockSkipNode.rowCount; - protobufBlockSkipNode->has_minimumvalue = blockSkipNode.hasMinMax; - protobufBlockSkipNode->minimumvalue = binaryMinimumValue; - protobufBlockSkipNode->has_maximumvalue = blockSkipNode.hasMinMax; - protobufBlockSkipNode->maximumvalue = binaryMaximumValue; - protobufBlockSkipNode->has_valueblockoffset = true; - protobufBlockSkipNode->valueblockoffset = blockSkipNode.valueBlockOffset; - protobufBlockSkipNode->has_valuelength = true; - protobufBlockSkipNode->valuelength = blockSkipNode.valueLength; - protobufBlockSkipNode->has_existsblockoffset = true; - protobufBlockSkipNode->existsblockoffset = blockSkipNode.existsBlockOffset; - protobufBlockSkipNode->has_existslength = true; - protobufBlockSkipNode->existslength = blockSkipNode.existsLength; - protobufBlockSkipNode->has_valuecompressiontype = true; - protobufBlockSkipNode->valuecompressiontype = - (Protobuf__CompressionType) blockSkipNode.valueCompressionType; - - protobufBlockSkipNodeArray[blockIndex] = protobufBlockSkipNode; - } - - protobufBlockSkipList.n_blockskipnodearray = blockCount; - protobufBlockSkipList.blockskipnodearray = protobufBlockSkipNodeArray; - - blockSkipListSize = - protobuf__column_block_skip_list__get_packed_size(&protobufBlockSkipList); - blockSkipListData = palloc0(blockSkipListSize); - protobuf__column_block_skip_list__pack(&protobufBlockSkipList, blockSkipListData); - - blockSkipListBuffer = palloc0(sizeof(StringInfoData)); - blockSkipListBuffer->len = blockSkipListSize; - blockSkipListBuffer->maxlen = blockSkipListSize; - blockSkipListBuffer->data = (char *) blockSkipListData; - - return blockSkipListBuffer; -} - - -/* - * DeserializeBlockCount deserializes the given column skip list buffer and - * returns the number of blocks in column skip list. - */ -uint32 -DeserializeBlockCount(StringInfo buffer) -{ - uint32 blockCount = 0; - Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; - - protobufBlockSkipList = - protobuf__column_block_skip_list__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufBlockSkipList == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid skip list buffer"))); - } - - blockCount = protobufBlockSkipList->n_blockskipnodearray; - - protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); - - return blockCount; -} - - -/* - * DeserializeRowCount deserializes the given column skip list buffer and - * returns the total number of rows in block skip list. - */ -uint32 -DeserializeRowCount(StringInfo buffer) -{ - uint32 rowCount = 0; - Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; - uint32 blockIndex = 0; - uint32 blockCount = 0; - - protobufBlockSkipList = - protobuf__column_block_skip_list__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufBlockSkipList == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid skip list buffer"))); - } - - blockCount = (uint32) protobufBlockSkipList->n_blockskipnodearray; - for (blockIndex = 0; blockIndex < blockCount; blockIndex++) - { - Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = - protobufBlockSkipList->blockskipnodearray[blockIndex]; - rowCount += protobufBlockSkipNode->rowcount; - } - - protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); - - return rowCount; -} - - -/* - * DeserializeColumnSkipList deserializes the given buffer and returns the result as - * a ColumnBlockSkipNode array. If the number of unpacked block skip nodes are not - * equal to the given block count function errors out. - */ -ColumnBlockSkipNode * -DeserializeColumnSkipList(StringInfo buffer, bool typeByValue, int typeLength, - uint32 blockCount) -{ - ColumnBlockSkipNode *blockSkipNodeArray = NULL; - uint32 blockIndex = 0; - Protobuf__ColumnBlockSkipList *protobufBlockSkipList = NULL; - - protobufBlockSkipList = - protobuf__column_block_skip_list__unpack(NULL, buffer->len, - (uint8 *) buffer->data); - if (protobufBlockSkipList == NULL) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("invalid skip list buffer"))); - } - - if (protobufBlockSkipList->n_blockskipnodearray != blockCount) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("block skip node count and block count don't match"))); - } - - blockSkipNodeArray = palloc0(blockCount * sizeof(ColumnBlockSkipNode)); - - for (blockIndex = 0; blockIndex < blockCount; blockIndex++) - { - Protobuf__ColumnBlockSkipNode *protobufBlockSkipNode = NULL; - ColumnBlockSkipNode *blockSkipNode = NULL; - bool hasMinMax = false; - Datum minimumValue = 0; - Datum maximumValue = 0; - - protobufBlockSkipNode = protobufBlockSkipList->blockskipnodearray[blockIndex]; - if (!protobufBlockSkipNode->has_rowcount || - !protobufBlockSkipNode->has_existsblockoffset || - !protobufBlockSkipNode->has_valueblockoffset || - !protobufBlockSkipNode->has_existslength || - !protobufBlockSkipNode->has_valuelength || - !protobufBlockSkipNode->has_valuecompressiontype) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("missing required block skip node metadata"))); - } - - if (protobufBlockSkipNode->has_minimumvalue != - protobufBlockSkipNode->has_maximumvalue) - { - ereport(ERROR, (errmsg("could not unpack column store"), - errdetail("has minimum and has maximum fields " - "don't match"))); - } - - hasMinMax = protobufBlockSkipNode->has_minimumvalue; - if (hasMinMax) - { - minimumValue = ProtobufBinaryToDatum(protobufBlockSkipNode->minimumvalue, - typeByValue, typeLength); - maximumValue = ProtobufBinaryToDatum(protobufBlockSkipNode->maximumvalue, - typeByValue, typeLength); - } - - blockSkipNode = &blockSkipNodeArray[blockIndex]; - blockSkipNode->rowCount = protobufBlockSkipNode->rowcount; - blockSkipNode->hasMinMax = hasMinMax; - blockSkipNode->minimumValue = minimumValue; - blockSkipNode->maximumValue = maximumValue; - blockSkipNode->existsBlockOffset = protobufBlockSkipNode->existsblockoffset; - blockSkipNode->valueBlockOffset = protobufBlockSkipNode->valueblockoffset; - blockSkipNode->existsLength = protobufBlockSkipNode->existslength; - blockSkipNode->valueLength = protobufBlockSkipNode->valuelength; - blockSkipNode->valueCompressionType = - (CompressionType) protobufBlockSkipNode->valuecompressiontype; - } - - protobuf__column_block_skip_list__free_unpacked(protobufBlockSkipList, NULL); - - return blockSkipNodeArray; -} - - -/* Converts a datum to a ProtobufCBinaryData. */ -static ProtobufCBinaryData -DatumToProtobufBinary(Datum datum, bool datumTypeByValue, int datumTypeLength) -{ - ProtobufCBinaryData protobufBinary = { 0, 0 }; - - int datumLength = att_addlength_datum(0, datumTypeLength, datum); - char *datumBuffer = palloc0(datumLength); - - if (datumTypeLength > 0) - { - if (datumTypeByValue) - { - store_att_byval(datumBuffer, datum, datumTypeLength); - } - else - { - memcpy(datumBuffer, DatumGetPointer(datum), datumTypeLength); - } - } - else - { - memcpy(datumBuffer, DatumGetPointer(datum), datumLength); - } - - protobufBinary.data = (uint8 *) datumBuffer; - protobufBinary.len = datumLength; - - return protobufBinary; -} - - -/* Converts the given ProtobufCBinaryData to a Datum. */ -static Datum -ProtobufBinaryToDatum(ProtobufCBinaryData protobufBinary, bool datumTypeByValue, - int datumTypeLength) -{ - Datum datum = 0; - - /* - * We copy the protobuf data so the result of this function lives even - * after the unpacked protobuf struct is freed. - */ - char *binaryDataCopy = palloc0(protobufBinary.len); - memcpy(binaryDataCopy, protobufBinary.data, protobufBinary.len); - - datum = fetch_att(binaryDataCopy, datumTypeByValue, datumTypeLength); - - return datum; -} diff --git a/cstore_metadata_serialization.h b/cstore_metadata_serialization.h deleted file mode 100644 index efd27000a..000000000 --- a/cstore_metadata_serialization.h +++ /dev/null @@ -1,31 +0,0 @@ -/*------------------------------------------------------------------------- - * - * cstore_metadata_serialization.h - * - * Type and function declarations to serialize/deserialize cstore metadata. - * - * Copyright (c) 2016, Citus Data, Inc. - * - * $Id$ - * - *------------------------------------------------------------------------- - */ - -#ifndef CSTORE_SERIALIZATION_H -#define CSTORE_SERIALIZATION_H - -/* Function declarations for metadata serialization */ -extern StringInfo SerializeColumnSkipList(ColumnBlockSkipNode *blockSkipNodeArray, - uint32 blockCount, bool typeByValue, - int typeLength); - -/* Function declarations for metadata deserialization */ -extern void DeserializePostScript(StringInfo buffer, uint64 *tableFooterLength); -extern uint32 DeserializeBlockCount(StringInfo buffer); -extern uint32 DeserializeRowCount(StringInfo buffer); -extern ColumnBlockSkipNode * DeserializeColumnSkipList(StringInfo buffer, - bool typeByValue, int typeLength, - uint32 blockCount); - - -#endif /* CSTORE_SERIALIZATION_H */ diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 5b6151ef6..af0eb96c4 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -31,13 +31,12 @@ #include "lib/stringinfo.h" #include "port.h" #include "storage/fd.h" +#include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/rel.h" -#include "cstore_metadata_serialization.h" - typedef struct { Relation rel; @@ -50,6 +49,8 @@ static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreTablesRelationId(void); static Oid CStoreTablesIndexRelationId(void); +static Oid CStoreSkipNodesRelationId(void); +static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); static int TableBlockRowCount(Oid relid); static void DeleteTableMetadataRowIfExists(Oid relid); @@ -59,15 +60,16 @@ static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, static void DeleteTupleAndEnforceConstraints(ModifyState *state, HeapTuple heapTuple); static void FinishModifyRelation(ModifyState *state); static EState * create_estate_for_relation(Relation rel); +static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); +static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_stripe_attr */ -#define Natts_cstore_stripe_attr 6 +#define Natts_cstore_stripe_attr 5 #define Anum_cstore_stripe_attr_relid 1 #define Anum_cstore_stripe_attr_stripe 2 #define Anum_cstore_stripe_attr_attr 3 #define Anum_cstore_stripe_attr_exists_size 4 #define Anum_cstore_stripe_attr_value_size 5 -#define Anum_cstore_stripe_attr_skiplist_size 6 /* constants for cstore_table */ #define Natts_cstore_tables 4 @@ -77,12 +79,29 @@ static EState * create_estate_for_relation(Relation rel); #define Anum_cstore_tables_version_minor 4 /* constants for cstore_stripe */ -#define Natts_cstore_stripes 5 +#define Natts_cstore_stripes 6 #define Anum_cstore_stripes_relid 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 -#define Anum_cstore_stripes_skiplist_length 4 -#define Anum_cstore_stripes_data_length 5 +#define Anum_cstore_stripes_data_length 4 +#define Anum_cstore_stripes_block_count 5 +#define Anum_cstore_stripes_row_count 6 + +/* constants for cstore_skipnodes */ +#define Natts_cstore_skipnodes 12 +#define Anum_cstore_skipnodes_relid 1 +#define Anum_cstore_skipnodes_stripe 2 +#define Anum_cstore_skipnodes_attr 3 +#define Anum_cstore_skipnodes_block 4 +#define Anum_cstore_skipnodes_row_count 5 +#define Anum_cstore_skipnodes_minimum_value 6 +#define Anum_cstore_skipnodes_maximum_value 7 +#define Anum_cstore_skipnodes_value_stream_offset 8 +#define Anum_cstore_skipnodes_value_stream_length 9 +#define Anum_cstore_skipnodes_exists_stream_offset 10 +#define Anum_cstore_skipnodes_exists_stream_length 11 +#define Anum_cstore_skipnodes_value_compression_type 12 + /* * InitCStoreTableMetadata adds a record for the given relation in cstore_table. @@ -117,6 +136,185 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) } +/* + * SaveStripeSkipList saves StripeSkipList for a given stripe as rows + * of cstore_skipnodes. + */ +void +SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, + TupleDesc tupleDescriptor) +{ + uint32 columnIndex = 0; + uint32 blockIndex = 0; + Oid cstoreSkipNodesOid = InvalidOid; + Relation cstoreSkipNodes = NULL; + ModifyState *modifyState = NULL; + uint32 columnCount = stripeSkipList->columnCount; + + cstoreSkipNodesOid = CStoreSkipNodesRelationId(); + cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock); + modifyState = StartModifyRelation(cstoreSkipNodes); + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) + { + ColumnBlockSkipNode *skipNode = + &stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; + + Datum values[Natts_cstore_skipnodes] = { + ObjectIdGetDatum(relid), + Int64GetDatum(stripe), + Int32GetDatum(columnIndex + 1), + Int32GetDatum(blockIndex), + Int64GetDatum(skipNode->rowCount), + 0, /* to be filled below */ + 0, /* to be filled below */ + Int64GetDatum(skipNode->valueBlockOffset), + Int64GetDatum(skipNode->valueLength), + Int64GetDatum(skipNode->existsBlockOffset), + Int64GetDatum(skipNode->existsLength), + Int32GetDatum(skipNode->valueCompressionType) + }; + + bool nulls[Natts_cstore_skipnodes] = { false }; + + if (skipNode->hasMinMax) + { + values[Anum_cstore_skipnodes_minimum_value - 1] = + PointerGetDatum(DatumToBytea(skipNode->minimumValue, + &tupleDescriptor->attrs[columnIndex])); + values[Anum_cstore_skipnodes_maximum_value - 1] = + PointerGetDatum(DatumToBytea(skipNode->maximumValue, + &tupleDescriptor->attrs[columnIndex])); + } + else + { + nulls[Anum_cstore_skipnodes_minimum_value - 1] = true; + nulls[Anum_cstore_skipnodes_maximum_value - 1] = true; + } + + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + } + } + + FinishModifyRelation(modifyState); + heap_close(cstoreSkipNodes, NoLock); + + CommandCounterIncrement(); +} + + +/* + * ReadStripeSkipList fetches StripeSkipList for a given stripe. + */ +StripeSkipList * +ReadStripeSkipList(Oid relid, uint64 stripe, TupleDesc tupleDescriptor, + uint32 blockCount) +{ + StripeSkipList *skipList = NULL; + uint32 columnIndex = 0; + Oid cstoreSkipNodesOid = InvalidOid; + Relation cstoreSkipNodes = NULL; + Relation index = NULL; + HeapTuple heapTuple = NULL; + uint32 columnCount = tupleDescriptor->natts; + ScanKeyData scanKey[2]; + SysScanDesc scanDescriptor = NULL; + + cstoreSkipNodesOid = CStoreSkipNodesRelationId(); + cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); + index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); + + ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relid, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); + + scanDescriptor = systable_beginscan_ordered(cstoreSkipNodes, index, NULL, 2, scanKey); + + skipList = palloc0(sizeof(StripeSkipList)); + skipList->blockCount = blockCount; + skipList->columnCount = columnCount; + skipList->blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + skipList->blockSkipNodeArray[columnIndex] = + palloc0(blockCount * sizeof(ColumnBlockSkipNode)); + } + + while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) + { + uint32 attr = 0; + uint32 blockIndex = 0; + ColumnBlockSkipNode *skipNode = NULL; + + Datum datumArray[Natts_cstore_skipnodes]; + bool isNullArray[Natts_cstore_skipnodes]; + + heap_deform_tuple(heapTuple, RelationGetDescr(cstoreSkipNodes), datumArray, + isNullArray); + + attr = DatumGetInt32(datumArray[Anum_cstore_skipnodes_attr - 1]); + blockIndex = DatumGetInt32(datumArray[Anum_cstore_skipnodes_block - 1]); + + if (attr <= 0 || attr > columnCount) + { + ereport(ERROR, (errmsg("invalid stripe skipnode entry"), + errdetail("Attribute number out of range: %u", attr))); + } + + if (blockIndex < 0 || blockIndex >= blockCount) + { + ereport(ERROR, (errmsg("invalid stripe skipnode entry"), + errdetail("Block number out of range: %u", blockIndex))); + } + + columnIndex = attr - 1; + + skipNode = &skipList->blockSkipNodeArray[columnIndex][blockIndex]; + skipNode->rowCount = DatumGetInt64(datumArray[Anum_cstore_skipnodes_row_count - + 1]); + skipNode->valueBlockOffset = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_value_stream_offset - 1]); + skipNode->valueLength = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_value_stream_length - 1]); + skipNode->existsBlockOffset = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_exists_stream_offset - 1]); + skipNode->existsLength = + DatumGetInt64(datumArray[Anum_cstore_skipnodes_exists_stream_length - 1]); + skipNode->valueCompressionType = + DatumGetInt32(datumArray[Anum_cstore_skipnodes_value_compression_type - 1]); + + if (isNullArray[Anum_cstore_skipnodes_minimum_value - 1] || + isNullArray[Anum_cstore_skipnodes_maximum_value - 1]) + { + skipNode->hasMinMax = false; + } + else + { + bytea *minValue = DatumGetByteaP( + datumArray[Anum_cstore_skipnodes_minimum_value - 1]); + bytea *maxValue = DatumGetByteaP( + datumArray[Anum_cstore_skipnodes_maximum_value - 1]); + + skipNode->minimumValue = + ByteaToDatum(minValue, &tupleDescriptor->attrs[columnIndex]); + skipNode->maximumValue = + ByteaToDatum(maxValue, &tupleDescriptor->attrs[columnIndex]); + + skipNode->hasMinMax = true; + } + } + + systable_endscan_ordered(scanDescriptor); + index_close(index, NoLock); + heap_close(cstoreSkipNodes, NoLock); + + return skipList; +} + + /* * InsertStripeMetadataRow adds a row to cstore_stripes. */ @@ -128,8 +326,9 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) ObjectIdGetDatum(relid), Int64GetDatum(stripe->id), Int64GetDatum(stripe->fileOffset), - Int64GetDatum(stripe->skipListLength), - Int64GetDatum(stripe->dataLength) + Int64GetDatum(stripe->dataLength), + Int32GetDatum(stripe->blockCount), + Int64GetDatum(stripe->rowCount) }; Oid cstoreStripesOid = CStoreStripesRelationId(); @@ -187,8 +386,10 @@ ReadTableMetadata(Oid relid) datumArray[Anum_cstore_stripes_file_offset - 1]); stripeMetadata->dataLength = DatumGetInt64( datumArray[Anum_cstore_stripes_data_length - 1]); - stripeMetadata->skipListLength = DatumGetInt64( - datumArray[Anum_cstore_stripes_skiplist_length - 1]); + stripeMetadata->blockCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_block_count - 1]); + stripeMetadata->rowCount = DatumGetInt64( + datumArray[Anum_cstore_stripes_row_count - 1]); tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, stripeMetadata); @@ -299,8 +500,7 @@ SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer) Int64GetDatum(stripe), Int16GetDatum(attr), Int64GetDatum(footer->existsSizeArray[attr - 1]), - Int64GetDatum(footer->valueSizeArray[attr - 1]), - Int64GetDatum(footer->skipListSizeArray[attr - 1]) + Int64GetDatum(footer->valueSizeArray[attr - 1]) }; InsertTupleAndEnforceConstraints(modifyState, values, nulls); @@ -339,7 +539,6 @@ ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) footer = palloc0(sizeof(StripeFooter)); footer->existsSizeArray = palloc0(relationColumnCount * sizeof(int64)); footer->valueSizeArray = palloc0(relationColumnCount * sizeof(int64)); - footer->skipListSizeArray = palloc0(relationColumnCount * sizeof(int64)); /* * Stripe can have less columns than the relation if ALTER TABLE happens @@ -369,8 +568,6 @@ ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) DatumGetInt64(datumArray[Anum_cstore_stripe_attr_exists_size - 1]); footer->valueSizeArray[attr - 1] = DatumGetInt64(datumArray[Anum_cstore_stripe_attr_value_size - 1]); - footer->skipListSizeArray[attr - 1] = - DatumGetInt64(datumArray[Anum_cstore_stripe_attr_skiplist_size - 1]); } systable_endscan_ordered(scanDescriptor); @@ -507,6 +704,55 @@ create_estate_for_relation(Relation rel) } +/* + * DatumToBytea serializes a datum into a bytea value. + */ +static bytea * +DatumToBytea(Datum value, Form_pg_attribute attrForm) +{ + int datumLength = att_addlength_datum(0, attrForm->attlen, value); + bytea *result = palloc0(datumLength + VARHDRSZ); + + SET_VARSIZE(result, datumLength + VARHDRSZ); + + if (attrForm->attlen > 0) + { + if (attrForm->attbyval) + { + store_att_byval(VARDATA(result), value, attrForm->attlen); + } + else + { + memcpy(VARDATA(result), DatumGetPointer(value), attrForm->attlen); + } + } + else + { + memcpy(VARDATA(result), DatumGetPointer(value), datumLength); + } + + return result; +} + + +/* + * ByteaToDatum deserializes a value which was previously serialized using + * DatumToBytea. + */ +static Datum +ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm) +{ + /* + * We copy the data so the result of this function lives even + * after the byteaDatum is freed. + */ + char *binaryDataCopy = palloc0(VARSIZE_ANY_EXHDR(bytes)); + memcpy(binaryDataCopy, VARDATA_ANY(bytes), VARSIZE_ANY_EXHDR(bytes)); + + return fetch_att(binaryDataCopy, attrForm->attbyval, attrForm->attlen); +} + + /* * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr. * TODO: should we cache this similar to citus? @@ -573,6 +819,28 @@ CStoreTablesIndexRelationId(void) } +/* + * CStoreSkipNodesRelationId returns relation id of cstore_skipnodes. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreSkipNodesRelationId(void) +{ + return get_relname_relid("cstore_skipnodes", CStoreNamespaceId()); +} + + +/* + * CStoreSkipNodesIndexRelationId returns relation id of cstore_skipnodes_pkey. + * TODO: should we cache this similar to citus? + */ +static Oid +CStoreSkipNodesIndexRelationId(void) +{ + return get_relname_relid("cstore_skipnodes_pkey", CStoreNamespaceId()); +} + + /* * CStoreNamespaceId returns namespace id of the schema we store cstore * related tables. diff --git a/cstore_reader.c b/cstore_reader.c index 654d74697..929c65c04 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -34,7 +34,6 @@ #include "utils/rel.h" #include "cstore.h" -#include "cstore_metadata_serialization.h" #include "cstore_version_compat.h" /* static function declarations */ @@ -53,12 +52,6 @@ static ColumnBuffers * LoadColumnBuffers(Relation relation, uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, Form_pg_attribute attributeForm); -static StripeSkipList * LoadStripeSkipList(Relation relation, - StripeMetadata *stripeMetadata, - StripeFooter *stripeFooter, - uint32 columnCount, - bool *projectedColumnMask, - TupleDesc tupleDescriptor); static bool * SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, List *whereClauseList); static List * BuildRestrictInfoList(List *whereClauseList); @@ -85,8 +78,6 @@ static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount); -static uint64 StripeRowCount(Relation relation, StripeMetadata *stripeMetadata); -static int RelationColumnCount(Oid relid); /* @@ -327,34 +318,13 @@ CStoreTableRowCount(Relation relation) foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); - totalRowCount += StripeRowCount(relation, stripeMetadata); + totalRowCount += stripeMetadata->rowCount; } return totalRowCount; } -/* - * StripeRowCount reads serialized stripe footer, the first column's - * skip list, and returns number of rows for given stripe. - */ -static uint64 -StripeRowCount(Relation relation, StripeMetadata *stripeMetadata) -{ - uint64 rowCount = 0; - StringInfo firstColumnSkipListBuffer = NULL; - - StripeFooter *stripeFooter = ReadStripeFooter(relation->rd_id, stripeMetadata->id, - RelationColumnCount(relation->rd_id)); - - firstColumnSkipListBuffer = ReadFromSmgr(relation, stripeMetadata->fileOffset, - stripeFooter->skipListSizeArray[0]); - rowCount = DeserializeRowCount(firstColumnSkipListBuffer); - - return rowCount; -} - - /* * LoadFilteredStripeBuffers reads serialized stripe data from the given file. * The function skips over blocks whose rows are refuted by restriction qualifiers, @@ -373,10 +343,10 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - StripeSkipList *stripeSkipList = LoadStripeSkipList(relation, stripeMetadata, - stripeFooter, columnCount, - projectedColumnMask, - tupleDescriptor); + StripeSkipList *stripeSkipList = ReadStripeSkipList(RelationGetRelid(relation), + stripeMetadata->id, + tupleDescriptor, + stripeMetadata->blockCount); bool *selectedBlockMask = SelectedBlockMask(stripeSkipList, projectedColumnList, whereClauseList); @@ -387,7 +357,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, /* load column data for projected columns */ columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); - currentColumnFileOffset = stripeMetadata->fileOffset + stripeMetadata->skipListLength; + currentColumnFileOffset = stripeMetadata->fileOffset; for (columnIndex = 0; columnIndex < stripeFooter->columnCount; columnIndex++) { @@ -511,98 +481,6 @@ LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, } -/* Reads the skip list for the given stripe. */ -static StripeSkipList * -LoadStripeSkipList(Relation relation, - StripeMetadata *stripeMetadata, - StripeFooter *stripeFooter, uint32 columnCount, - bool *projectedColumnMask, - TupleDesc tupleDescriptor) -{ - StripeSkipList *stripeSkipList = NULL; - ColumnBlockSkipNode **blockSkipNodeArray = NULL; - StringInfo firstColumnSkipListBuffer = NULL; - uint64 currentColumnSkipListFileOffset = 0; - uint32 columnIndex = 0; - uint32 stripeBlockCount = 0; - uint32 stripeColumnCount = stripeFooter->columnCount; - - /* deserialize block count */ - firstColumnSkipListBuffer = ReadFromSmgr(relation, stripeMetadata->fileOffset, - stripeFooter->skipListSizeArray[0]); - stripeBlockCount = DeserializeBlockCount(firstColumnSkipListBuffer); - - /* deserialize column skip lists */ - blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); - currentColumnSkipListFileOffset = stripeMetadata->fileOffset; - - for (columnIndex = 0; columnIndex < stripeColumnCount; columnIndex++) - { - uint64 columnSkipListSize = stripeFooter->skipListSizeArray[columnIndex]; - bool firstColumn = columnIndex == 0; - - /* - * Only selected columns' column skip lists are read. However, the first - * column's skip list is read regardless of being selected. It is used by - * StripeSkipListRowCount later. - */ - if (projectedColumnMask[columnIndex] || firstColumn) - { - Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); - - StringInfo columnSkipListBuffer = - ReadFromSmgr(relation, currentColumnSkipListFileOffset, - columnSkipListSize); - ColumnBlockSkipNode *columnSkipList = - DeserializeColumnSkipList(columnSkipListBuffer, attributeForm->attbyval, - attributeForm->attlen, stripeBlockCount); - blockSkipNodeArray[columnIndex] = columnSkipList; - } - - currentColumnSkipListFileOffset += columnSkipListSize; - } - - /* table contains additional columns added after this stripe is created */ - for (columnIndex = stripeColumnCount; columnIndex < columnCount; columnIndex++) - { - ColumnBlockSkipNode *columnSkipList = NULL; - uint32 blockIndex = 0; - bool firstColumn = columnIndex == 0; - - /* no need to create ColumnBlockSkipList if the column is not selected */ - if (!projectedColumnMask[columnIndex] && !firstColumn) - { - blockSkipNodeArray[columnIndex] = NULL; - continue; - } - - /* create empty ColumnBlockSkipNode for missing columns*/ - columnSkipList = palloc0(stripeBlockCount * sizeof(ColumnBlockSkipNode)); - - for (blockIndex = 0; blockIndex < stripeBlockCount; blockIndex++) - { - columnSkipList[blockIndex].rowCount = 0; - columnSkipList[blockIndex].hasMinMax = false; - columnSkipList[blockIndex].minimumValue = 0; - columnSkipList[blockIndex].maximumValue = 0; - columnSkipList[blockIndex].existsBlockOffset = 0; - columnSkipList[blockIndex].valueBlockOffset = 0; - columnSkipList[blockIndex].existsLength = 0; - columnSkipList[blockIndex].valueLength = 0; - columnSkipList[blockIndex].valueCompressionType = COMPRESSION_NONE; - } - blockSkipNodeArray[columnIndex] = columnSkipList; - } - - stripeSkipList = palloc0(sizeof(StripeSkipList)); - stripeSkipList->blockSkipNodeArray = blockSkipNodeArray; - stripeSkipList->columnCount = columnCount; - stripeSkipList->blockCount = stripeBlockCount; - - return stripeSkipList; -} - - /* * SelectedBlockMask walks over each column's blocks and checks if a block can * be filtered without reading its data. The filtering happens when all rows in @@ -1207,15 +1085,3 @@ ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount) } } } - - -static int -RelationColumnCount(Oid relid) -{ - Relation rel = RelationIdGetRelation(relid); - TupleDesc tupleDesc = RelationGetDescr(rel); - int columnCount = tupleDesc->natts; - RelationClose(rel); - - return columnCount; -} diff --git a/cstore_writer.c b/cstore_writer.c index 319136dc9..8a5f498e2 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -24,7 +24,6 @@ #include "utils/rel.h" #include "cstore.h" -#include "cstore_metadata_serialization.h" #include "cstore_version_compat.h" static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, @@ -34,10 +33,7 @@ static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount); static StripeMetadata FlushStripe(TableWriteState *writeState); -static StringInfo * CreateSkipListBufferArray(StripeSkipList *stripeSkipList, - TupleDesc tupleDescriptor); -static StripeFooter * CreateStripeFooter(StripeSkipList *stripeSkipList, - StringInfo *skipListBufferArray); +static StripeFooter * CreateStripeFooter(StripeSkipList *stripeSkipList); static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, int datumTypeLength, @@ -90,9 +86,7 @@ CStoreBeginWrite(Oid relationId, uint64 lastStripeSize = 0; lastStripe = llast(tableMetadata->stripeMetadataList); - lastStripeSize += lastStripe->skipListLength; lastStripeSize += lastStripe->dataLength; - lastStripeSize += lastStripe->footerLength; currentFileOffset = lastStripe->fileOffset + lastStripeSize; currentStripeId = lastStripe->id + 1; @@ -429,10 +423,8 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) static StripeMetadata FlushStripe(TableWriteState *writeState) { - StripeMetadata stripeMetadata = { 0, 0, 0, 0 }; - uint64 skipListLength = 0; + StripeMetadata stripeMetadata = { 0 }; uint64 dataLength = 0; - StringInfo *skipListBufferArray = NULL; StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; @@ -486,32 +478,21 @@ FlushStripe(TableWriteState *writeState) } /* create skip list and footer buffers */ - skipListBufferArray = CreateSkipListBufferArray(stripeSkipList, tupleDescriptor); - stripeFooter = CreateStripeFooter(stripeSkipList, skipListBufferArray); + SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, + stripeSkipList, tupleDescriptor); + stripeFooter = CreateStripeFooter(stripeSkipList); /* - * Each stripe has three sections: - * (1) Skip list, which contains statistics for each column block, and can - * be used to skip reading row blocks that are refuted by WHERE clause list, - * (2) Data section, in which we store data for each column continuously. + * Each stripe has only one section: + * Data section, in which we store data for each column continuously. * We store data for each for each column in blocks. For each block, we * store two buffers: "exists" buffer, and "value" buffer. "exists" buffer * tells which values are not NULL. "value" buffer contains values for * present values. For each column, we first store all "exists" buffers, * and then all "value" buffers. - * (3) Stripe footer, which contains the skip list buffer size, exists buffer - * size, and value buffer size for each of the columns. - * - * We start by flushing the skip list buffers. */ - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - StringInfo skipListBuffer = skipListBufferArray[columnIndex]; - WriteToSmgr(writeState, skipListBuffer->data, skipListBuffer->len); - writeState->currentFileOffset += skipListBuffer->len; - } - /* then, we flush the data buffers */ + /* flush the data buffers */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; @@ -546,60 +527,32 @@ FlushStripe(TableWriteState *writeState) /* set stripe metadata */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { - skipListLength += stripeFooter->skipListSizeArray[columnIndex]; dataLength += stripeFooter->existsSizeArray[columnIndex]; dataLength += stripeFooter->valueSizeArray[columnIndex]; } + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + stripeMetadata.rowCount += + stripeSkipList->blockSkipNodeArray[0][blockIndex].rowCount; + } + stripeMetadata.fileOffset = initialFileOffset; - stripeMetadata.skipListLength = skipListLength; stripeMetadata.dataLength = dataLength; - stripeMetadata.footerLength = 0; stripeMetadata.id = writeState->currentStripeId; + stripeMetadata.blockCount = blockCount; return stripeMetadata; } -/* - * CreateSkipListBufferArray serializes the skip list for each column of the - * given stripe and returns the result as an array. - */ -static StringInfo * -CreateSkipListBufferArray(StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor) -{ - StringInfo *skipListBufferArray = NULL; - uint32 columnIndex = 0; - uint32 columnCount = stripeSkipList->columnCount; - - skipListBufferArray = palloc0(columnCount * sizeof(StringInfo)); - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - StringInfo skipListBuffer = NULL; - ColumnBlockSkipNode *blockSkipNodeArray = - stripeSkipList->blockSkipNodeArray[columnIndex]; - Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); - - skipListBuffer = SerializeColumnSkipList(blockSkipNodeArray, - stripeSkipList->blockCount, - attributeForm->attbyval, - attributeForm->attlen); - - skipListBufferArray[columnIndex] = skipListBuffer; - } - - return skipListBufferArray; -} - - /* Creates and returns the footer for given stripe. */ static StripeFooter * -CreateStripeFooter(StripeSkipList *stripeSkipList, StringInfo *skipListBufferArray) +CreateStripeFooter(StripeSkipList *stripeSkipList) { StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 columnCount = stripeSkipList->columnCount; - uint64 *skipListSizeArray = palloc0(columnCount * sizeof(uint64)); uint64 *existsSizeArray = palloc0(columnCount * sizeof(uint64)); uint64 *valueSizeArray = palloc0(columnCount * sizeof(uint64)); @@ -614,12 +567,10 @@ CreateStripeFooter(StripeSkipList *stripeSkipList, StringInfo *skipListBufferArr existsSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].existsLength; valueSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].valueLength; } - skipListSizeArray[columnIndex] = skipListBufferArray[columnIndex]->len; } stripeFooter = palloc0(sizeof(StripeFooter)); stripeFooter->columnCount = columnCount; - stripeFooter->skipListSizeArray = skipListSizeArray; stripeFooter->existsSizeArray = existsSizeArray; stripeFooter->valueSizeArray = valueSizeArray; From 139da88ad97d8fb7aa4772a5bf774dbd1bd3994b Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 14 Sep 2020 15:08:50 -0700 Subject: [PATCH 019/124] Remove some unnecessary code & fix compiler warnings --- cstore_fdw.c | 62 +++++++++++----------------------------------------- 1 file changed, 13 insertions(+), 49 deletions(-) diff --git a/cstore_fdw.c b/cstore_fdw.c index cd8dcf4ef..63951c5f3 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -96,7 +96,7 @@ typedef struct CStoreValidOption #define COMPRESSION_STRING_DELIMITED_LIST "none, pglz" /* Array of options that are valid for cstore_fdw */ -static const uint32 ValidOptionCount = 4; +static const uint32 ValidOptionCount = 3; static const CStoreValidOption ValidOptionArray[] = { /* foreign table options */ @@ -319,54 +319,22 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else if (nodeTag(parseTree) == T_DropStmt) { - DropStmt *dropStmt = (DropStmt *) parseTree; + List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); + ListCell *lc = NULL; - if (dropStmt->removeType == OBJECT_EXTENSION) + /* drop smgr storage */ + foreach(lc, dropRelids) { - bool removeCStoreDirectory = false; - ListCell *objectCell = NULL; + Oid relid = lfirst_oid(lc); + Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); - foreach(objectCell, dropStmt->objects) - { - Node *object = (Node *) lfirst(objectCell); - char *objectName = NULL; - -#if PG_VERSION_NUM >= 100000 - Assert(IsA(object, String)); - objectName = strVal(object); -#else - Assert(IsA(object, List)); - objectName = strVal(linitial((List *) object)); -#endif - - if (strncmp(CSTORE_FDW_NAME, objectName, NAMEDATALEN) == 0) - { - removeCStoreDirectory = true; - } - } - - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + RelationOpenSmgr(relation); + RelationDropStorage(relation); + heap_close(relation, AccessExclusiveLock); } - else - { - List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); - ListCell *lc = NULL; - /* drop smgr storage */ - foreach(lc, dropRelids) - { - Oid relid = lfirst_oid(lc); - Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); - - RelationOpenSmgr(relation); - RelationDropStorage(relation); - heap_close(relation, AccessExclusiveLock); - } - - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); - } + CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, + destReceiver, completionTag); } else if (nodeTag(parseTree) == T_TruncateStmt) { @@ -857,11 +825,9 @@ TruncateCStoreTables(List *cstoreRelationList) { Relation relation = (Relation) lfirst(relationCell); Oid relationId = relation->rd_id; - CStoreOptions *cstoreOptions = NULL; Assert(CStoreTable(relationId)); - cstoreOptions = CStoreGetOptions(relationId); InitializeRelFileNode(relation, true); InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } @@ -1735,7 +1701,6 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) { TableReadState *readState = NULL; Oid foreignTableId = InvalidOid; - CStoreOptions *cstoreOptions = NULL; Relation currentRelation = scanState->ss.ss_currentRelation; TupleDesc tupleDescriptor = RelationGetDescr(currentRelation); List *columnList = NIL; @@ -1752,7 +1717,6 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) } foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - cstoreOptions = CStoreGetOptions(foreignTableId); foreignScan = (ForeignScan *) scanState->ss.ps.plan; foreignPrivateList = (List *) foreignScan->fdw_private; @@ -1873,6 +1837,7 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, ForeignScan *foreignScan = NULL; char *relationName = NULL; int executorFlags = 0; + uint32 columnIndex = 0; TupleDesc tupleDescriptor = RelationGetDescr(relation); uint32 columnCount = tupleDescriptor->natts; @@ -1880,7 +1845,6 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, cstore_fdw_initrel(relation); /* create list of columns of the relation */ - uint32 columnIndex = 0; for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); From a94bbcc7ef1e3d533b0f237b4e37c1c1abaf5927 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Fri, 11 Sep 2020 18:12:47 +0200 Subject: [PATCH 020/124] write wal entries when writing to the buffers --- cstore_writer.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/cstore_writer.c b/cstore_writer.c index 8a5f498e2..5e44812bd 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -18,6 +18,7 @@ #include "access/nbtree.h" #include "catalog/pg_am.h" +#include "miscadmin.h" #include "storage/fd.h" #include "storage/smgr.h" #include "utils/memutils.h" @@ -401,11 +402,30 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) /* always appending */ Assert(phdr->pd_lower == addr.offset); + START_CRIT_SECTION(); + to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); memcpy(page + phdr->pd_lower, data, to_write); phdr->pd_lower += to_write; MarkBufferDirty(buffer); + + if (RelationNeedsWAL(rel)) + { + XLogBeginInsert(); + + /* + * Since cstore will mostly write whole pages we force the transmission of the + * whole image in the buffer + */ + XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE); + + XLogRecPtr recptr = XLogInsert(RM_GENERIC_ID, 0); + PageSetLSN(page, recptr); + } + + END_CRIT_SECTION(); + UnlockReleaseBuffer(buffer); data += to_write; From d69bff7621e83edccf336e734031048341c5a804 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 15 Sep 2020 10:05:27 -0700 Subject: [PATCH 021/124] Use schema config in control file --- cstore_fdw--1.7.sql | 28 +++++++++++++--------------- cstore_fdw.control | 3 ++- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 1c19fda50..19801f1f8 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -3,8 +3,6 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "CREATE EXTENSION cstore_fdw" to load this file. \quit -CREATE SCHEMA cstore; - CREATE FUNCTION cstore_fdw_handler() RETURNS fdw_handler AS 'MODULE_PATHNAME' @@ -28,7 +26,7 @@ CREATE EVENT TRIGGER cstore_ddl_event_end ON ddl_command_end EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); -CREATE FUNCTION cstore_table_size(relation regclass) +CREATE FUNCTION public.cstore_table_size(relation regclass) RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C STRICT; @@ -50,7 +48,7 @@ BEGIN CONTINUE; END IF; - PERFORM public.cstore_clean_table_resources(v_obj.objid); + PERFORM cstore.cstore_clean_table_resources(v_obj.objid); END LOOP; END; @@ -60,7 +58,7 @@ CREATE EVENT TRIGGER cstore_drop_event ON SQL_DROP EXECUTE PROCEDURE cstore_drop_trigger(); -CREATE TABLE cstore.cstore_tables ( +CREATE TABLE cstore_tables ( relid oid NOT NULL, block_row_count int NOT NULL, version_major bigint NOT NULL, @@ -68,9 +66,9 @@ CREATE TABLE cstore.cstore_tables ( PRIMARY KEY (relid) ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore.cstore_tables IS 'CStore table wide metadata'; +COMMENT ON TABLE cstore_tables IS 'CStore table wide metadata'; -CREATE TABLE cstore.cstore_stripes ( +CREATE TABLE cstore_stripes ( relid oid NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, @@ -78,24 +76,24 @@ CREATE TABLE cstore.cstore_stripes ( block_count int NOT NULL, row_count bigint NOT NULL, PRIMARY KEY (relid, stripe), - FOREIGN KEY (relid) REFERENCES cstore.cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore.cstore_tables IS 'CStore per stripe metadata'; +COMMENT ON TABLE cstore_tables IS 'CStore per stripe metadata'; -CREATE TABLE cstore.cstore_stripe_attr ( +CREATE TABLE cstore_stripe_attr ( relid oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, exists_size bigint NOT NULL, value_size bigint NOT NULL, PRIMARY KEY (relid, stripe, attr), - FOREIGN KEY (relid, stripe) REFERENCES cstore.cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore.cstore_tables IS 'CStore per stripe/column combination metadata'; +COMMENT ON TABLE cstore_tables IS 'CStore per stripe/column combination metadata'; -CREATE TABLE cstore.cstore_skipnodes ( +CREATE TABLE cstore_skipnodes ( relid oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, @@ -109,7 +107,7 @@ CREATE TABLE cstore.cstore_skipnodes ( exists_stream_length bigint NOT NULL, value_compression_type int NOT NULL, PRIMARY KEY (relid, stripe, attr, block), - FOREIGN KEY (relid, stripe, attr) REFERENCES cstore.cstore_stripe_attr(relid, stripe, attr) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid, stripe, attr) REFERENCES cstore_stripe_attr(relid, stripe, attr) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore.cstore_tables IS 'CStore per block metadata'; +COMMENT ON TABLE cstore_tables IS 'CStore per block metadata'; diff --git a/cstore_fdw.control b/cstore_fdw.control index a95b8509f..6f781dcbb 100644 --- a/cstore_fdw.control +++ b/cstore_fdw.control @@ -2,4 +2,5 @@ comment = 'foreign-data wrapper for flat cstore access' default_version = '1.7' module_pathname = '$libdir/cstore_fdw' -relocatable = true +relocatable = false +schema = cstore From f7f59933f868ce1803d35cb3501b0fd0a109371f Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 12:39:51 -0700 Subject: [PATCH 022/124] fix v11 tests --- cstore_fdw.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/cstore_fdw.c b/cstore_fdw.c index 63951c5f3..36f576cee 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -129,7 +129,7 @@ static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); static List * DroppedCStoreRelidList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); -static void InitializeRelFileNode(Relation relation, bool force); +static void FdwNewRelFileNode(Relation relation); static void TruncateCStoreTables(List *cstoreRelationList); static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); @@ -828,26 +828,22 @@ TruncateCStoreTables(List *cstoreRelationList) Assert(CStoreTable(relationId)); - InitializeRelFileNode(relation, true); + FdwNewRelFileNode(relation); InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); } } /* - * Version 11 and earlier already create a relfilenode for foreign + * Version 11 and earlier already assign a relfilenode for foreign * tables. Version 12 and later do not, so we need to create one manually. */ static void -InitializeRelFileNode(Relation relation, bool force) +FdwNewRelFileNode(Relation relation) { -#if PG_VERSION_NUM >= 120000 Relation pg_class; HeapTuple tuple; Form_pg_class classform; - /* - * Get a writable copy of the pg_class tuple for the given relation. - */ pg_class = heap_open(RelationRelationId, RowExclusiveLock); tuple = SearchSysCacheCopy1(RELOID, @@ -857,14 +853,13 @@ InitializeRelFileNode(Relation relation, bool force) RelationGetRelid(relation)); classform = (Form_pg_class) GETSTRUCT(tuple); - if (!OidIsValid(classform->relfilenode) || force) + if (true) { char persistence = relation->rd_rel->relpersistence; Relation tmprel; Oid tablespace; Oid filenode; RelFileNode newrnode; - SMgrRelation srel; /* * Upgrade to AccessExclusiveLock, and hold until the end of the @@ -874,6 +869,9 @@ InitializeRelFileNode(Relation relation, bool force) tmprel = heap_open(relation->rd_id, AccessExclusiveLock); heap_close(tmprel, NoLock); + if (OidIsValid(relation->rd_rel->relfilenode)) + RelationDropStorage(relation); + if (OidIsValid(relation->rd_rel->reltablespace)) tablespace = relation->rd_rel->reltablespace; else @@ -885,9 +883,6 @@ InitializeRelFileNode(Relation relation, bool force) newrnode.dbNode = MyDatabaseId; newrnode.relNode = filenode; - srel = RelationCreateStorage(newrnode, persistence); - smgrclose(srel); - classform->relfilenode = filenode; classform->relpages = 0; /* it's empty until further notice */ classform->reltuples = 0; @@ -901,7 +896,25 @@ InitializeRelFileNode(Relation relation, bool force) heap_freetuple(tuple); heap_close(pg_class, RowExclusiveLock); +} + +static void +FdwCreateStorage(Relation relation) +{ + Assert(OidIsValid(relation->rd_rel->relfilenode)); + RelationOpenSmgr(relation); + if (!smgrexists(relation->rd_smgr, MAIN_FORKNUM)) + { +#if PG_VERSION_NUM >= 120000 + SMgrRelation srel; + srel = RelationCreateStorage(relation->rd_node, + relation->rd_rel->relpersistence); + smgrclose(srel); +#else + RelationCreateStorage(relation->rd_node, + relation->rd_rel->relpersistence); #endif + } } @@ -2174,7 +2187,7 @@ cstore_fdw_initrel(Relation rel) { #if PG_VERSION_NUM >= 120000 if (rel->rd_rel->relfilenode == InvalidOid) - InitializeRelFileNode(rel, false); + FdwNewRelFileNode(rel); /* * Copied code from RelationInitPhysicalAddr(), which doesn't @@ -2188,6 +2201,7 @@ cstore_fdw_initrel(Relation rel) rel->rd_node.dbNode = MyDatabaseId; rel->rd_node.relNode = rel->rd_rel->relfilenode; #endif + FdwCreateStorage(rel); } static Relation From fe7ab6df84ec7f982477fb1526db4fc67624a96e Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 09:10:34 -0700 Subject: [PATCH 023/124] Rename tests to be FDW-specific. --- Makefile | 4 ++-- expected/{alter.out => fdw_alter.out} | 0 expected/{analyze.out => fdw_analyze.out} | 0 expected/{drop.out => fdw_drop.out} | 0 expected/{functions.out => fdw_functions.out} | 0 expected/{insert.out => fdw_insert.out} | 0 expected/{query.out => fdw_query.out} | 0 expected/{truncate.out => fdw_truncate.out} | 0 expected/{truncate_0.out => fdw_truncate_0.out} | 0 input/{block_filtering.source => fdw_block_filtering.source} | 0 input/{copyto.source => fdw_copyto.source} | 0 input/{create.source => fdw_create.source} | 0 input/{data_types.source => fdw_data_types.source} | 0 input/{load.source => fdw_load.source} | 0 output/{block_filtering.source => fdw_block_filtering.source} | 0 output/{copyto.source => fdw_copyto.source} | 0 output/{create.source => fdw_create.source} | 0 output/{data_types.source => fdw_data_types.source} | 0 output/{load.source => fdw_load.source} | 0 sql/{alter.sql => fdw_alter.sql} | 0 sql/{analyze.sql => fdw_analyze.sql} | 0 sql/{drop.sql => fdw_drop.sql} | 0 sql/{functions.sql => fdw_functions.sql} | 0 sql/{insert.sql => fdw_insert.sql} | 0 sql/{query.sql => fdw_query.sql} | 0 sql/{truncate.sql => fdw_truncate.sql} | 0 26 files changed, 2 insertions(+), 2 deletions(-) rename expected/{alter.out => fdw_alter.out} (100%) rename expected/{analyze.out => fdw_analyze.out} (100%) rename expected/{drop.out => fdw_drop.out} (100%) rename expected/{functions.out => fdw_functions.out} (100%) rename expected/{insert.out => fdw_insert.out} (100%) rename expected/{query.out => fdw_query.out} (100%) rename expected/{truncate.out => fdw_truncate.out} (100%) rename expected/{truncate_0.out => fdw_truncate_0.out} (100%) rename input/{block_filtering.source => fdw_block_filtering.source} (100%) rename input/{copyto.source => fdw_copyto.source} (100%) rename input/{create.source => fdw_create.source} (100%) rename input/{data_types.source => fdw_data_types.source} (100%) rename input/{load.source => fdw_load.source} (100%) rename output/{block_filtering.source => fdw_block_filtering.source} (100%) rename output/{copyto.source => fdw_copyto.source} (100%) rename output/{create.source => fdw_create.source} (100%) rename output/{data_types.source => fdw_data_types.source} (100%) rename output/{load.source => fdw_load.source} (100%) rename sql/{alter.sql => fdw_alter.sql} (100%) rename sql/{analyze.sql => fdw_analyze.sql} (100%) rename sql/{drop.sql => fdw_drop.sql} (100%) rename sql/{functions.sql => fdw_functions.sql} (100%) rename sql/{insert.sql => fdw_insert.sql} (100%) rename sql/{query.sql => fdw_query.sql} (100%) rename sql/{truncate.sql => fdw_truncate.sql} (100%) diff --git a/Makefile b/Makefile index b8277f3a2..a266edd9a 100644 --- a/Makefile +++ b/Makefile @@ -14,8 +14,8 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql -REGRESS = create load query analyze data_types functions block_filtering drop \ - insert copyto alter truncate +REGRESS = fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ + fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ diff --git a/expected/alter.out b/expected/fdw_alter.out similarity index 100% rename from expected/alter.out rename to expected/fdw_alter.out diff --git a/expected/analyze.out b/expected/fdw_analyze.out similarity index 100% rename from expected/analyze.out rename to expected/fdw_analyze.out diff --git a/expected/drop.out b/expected/fdw_drop.out similarity index 100% rename from expected/drop.out rename to expected/fdw_drop.out diff --git a/expected/functions.out b/expected/fdw_functions.out similarity index 100% rename from expected/functions.out rename to expected/fdw_functions.out diff --git a/expected/insert.out b/expected/fdw_insert.out similarity index 100% rename from expected/insert.out rename to expected/fdw_insert.out diff --git a/expected/query.out b/expected/fdw_query.out similarity index 100% rename from expected/query.out rename to expected/fdw_query.out diff --git a/expected/truncate.out b/expected/fdw_truncate.out similarity index 100% rename from expected/truncate.out rename to expected/fdw_truncate.out diff --git a/expected/truncate_0.out b/expected/fdw_truncate_0.out similarity index 100% rename from expected/truncate_0.out rename to expected/fdw_truncate_0.out diff --git a/input/block_filtering.source b/input/fdw_block_filtering.source similarity index 100% rename from input/block_filtering.source rename to input/fdw_block_filtering.source diff --git a/input/copyto.source b/input/fdw_copyto.source similarity index 100% rename from input/copyto.source rename to input/fdw_copyto.source diff --git a/input/create.source b/input/fdw_create.source similarity index 100% rename from input/create.source rename to input/fdw_create.source diff --git a/input/data_types.source b/input/fdw_data_types.source similarity index 100% rename from input/data_types.source rename to input/fdw_data_types.source diff --git a/input/load.source b/input/fdw_load.source similarity index 100% rename from input/load.source rename to input/fdw_load.source diff --git a/output/block_filtering.source b/output/fdw_block_filtering.source similarity index 100% rename from output/block_filtering.source rename to output/fdw_block_filtering.source diff --git a/output/copyto.source b/output/fdw_copyto.source similarity index 100% rename from output/copyto.source rename to output/fdw_copyto.source diff --git a/output/create.source b/output/fdw_create.source similarity index 100% rename from output/create.source rename to output/fdw_create.source diff --git a/output/data_types.source b/output/fdw_data_types.source similarity index 100% rename from output/data_types.source rename to output/fdw_data_types.source diff --git a/output/load.source b/output/fdw_load.source similarity index 100% rename from output/load.source rename to output/fdw_load.source diff --git a/sql/alter.sql b/sql/fdw_alter.sql similarity index 100% rename from sql/alter.sql rename to sql/fdw_alter.sql diff --git a/sql/analyze.sql b/sql/fdw_analyze.sql similarity index 100% rename from sql/analyze.sql rename to sql/fdw_analyze.sql diff --git a/sql/drop.sql b/sql/fdw_drop.sql similarity index 100% rename from sql/drop.sql rename to sql/fdw_drop.sql diff --git a/sql/functions.sql b/sql/fdw_functions.sql similarity index 100% rename from sql/functions.sql rename to sql/fdw_functions.sql diff --git a/sql/insert.sql b/sql/fdw_insert.sql similarity index 100% rename from sql/insert.sql rename to sql/fdw_insert.sql diff --git a/sql/query.sql b/sql/fdw_query.sql similarity index 100% rename from sql/query.sql rename to sql/fdw_query.sql diff --git a/sql/truncate.sql b/sql/fdw_truncate.sql similarity index 100% rename from sql/truncate.sql rename to sql/fdw_truncate.sql From 09208986ba4465df1c7af62f1fdbba8250712e71 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Wed, 16 Sep 2020 15:20:17 +0200 Subject: [PATCH 024/124] remove travis --- .travis.yml | 42 ------------------------------------------ 1 file changed, 42 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f83f7206e..000000000 --- a/.travis.yml +++ /dev/null @@ -1,42 +0,0 @@ -sudo: required -dist: bionic -language: c -cache: - apt: true - directories: - - /home/travis/postgresql -env: - global: - - enable_coverage=yes - - PG_PRELOAD=cstore_fdw - matrix: - - PGVERSION=9.3 - - PGVERSION=9.4 - - PGVERSION=9.5 - - PGVERSION=9.6 - - PGVERSION=10 - - PGVERSION=11 - - PGVERSION=12 - -before_install: - - git clone -b v0.7.13 --depth 1 https://github.com/citusdata/tools.git - - sudo make -C tools install - - setup_apt - - nuke_pg -install: - - sudo apt-get install protobuf-c-compiler - - sudo apt-get install libprotobuf-c0-dev - - sudo locale-gen da_DK - - sudo locale-gen da_DK.utf8 - - sudo pip install cpp-coveralls - - install_pg - - install_custom_pg -before_script: - - chmod 777 . - - chmod 777 data - - chmod 666 data/* - - config_and_start_cluster -script: pg_travis_test -after_success: - - sudo chmod 666 *.gcda - - coveralls --exclude cstore.pb-c.c --exclude cstore.pb-c.h From 20a8bca426b80be6e9db7c1d57938d81602425aa Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Wed, 16 Sep 2020 15:21:24 +0200 Subject: [PATCH 025/124] add integration files for circle ci This is based on the circle ci integration we have for citus, albeit highly simplified. --- .circleci/build.sh | 16 +++++++ .circleci/config.yml | 101 ++++++++++++++++++++++++++++++++++++++++++ .circleci/run_test.sh | 27 +++++++++++ .gitignore | 5 +++ 4 files changed, 149 insertions(+) create mode 100755 .circleci/build.sh create mode 100644 .circleci/config.yml create mode 100755 .circleci/run_test.sh diff --git a/.circleci/build.sh b/.circleci/build.sh new file mode 100755 index 000000000..6a9f14c74 --- /dev/null +++ b/.circleci/build.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +set -euxo pipefail +IFS=$'\n\t' + +status=0 + +basedir="$(pwd)" +installdir="${basedir}/install-${PG_MAJOR}" + +make install DESTDIR="${installdir}" +pushd "${installdir}" +find . -type f -print > "${basedir}/files.lst" +cat "${basedir}/files.lst" +tar cvf "${basedir}/install-${PG_MAJOR}.tar" $(cat "${basedir}/files.lst") +popd diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 000000000..9f2532c1d --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,101 @@ +version: 2.1 +orbs: + codecov: codecov/codecov@1.1.1 + +jobs: + check-style: + docker: + - image: 'citus/stylechecker:latest' + steps: + - checkout + - run: + name: 'Check Style' + command: | + citus_indent --check + - run: + name: 'Check if whitespace fixing changed anything, install editorconfig if it did' + command: | + git diff --exit-code + + build-11: + docker: + - image: 'citus/extbuilder:11.9' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=11 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-11.tar + + build-12: + docker: + - image: 'citus/extbuilder:12.4' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=12 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-12.tar + + test-11_checkinstall: + docker: + - image: 'citus/exttester:11.9' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_11,installcheck' + + test-12_checkinstall: + docker: + - image: 'citus/exttester:12.4' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_12,installcheck' + +workflows: + version: 2 + build_and_test: + jobs: + + - check-style + + - build-11 + - build-12 + + - test-11_checkinstall: + requires: [build-11] + - test-12_checkinstall: + requires: [build-12] diff --git a/.circleci/run_test.sh b/.circleci/run_test.sh new file mode 100755 index 000000000..f9e183b56 --- /dev/null +++ b/.circleci/run_test.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +set -euxo pipefail +IFS=$'\n\t' + +status=0 + +export PGPORT=${PGPORT:-55432} + +function cleanup { + pg_ctl -D /tmp/postgres stop + rm -rf /tmp/postgres +} +trap cleanup EXIT + +rm -rf /tmp/postgres +initdb -E unicode /tmp/postgres +echo "shared_preload_libraries = 'cstore_fdw'" >> /tmp/postgres/postgresql.conf +pg_ctl -D /tmp/postgres -o "-p ${PGPORT}" -l /tmp/postgres_logfile start || status=$? +if [ -z $status ]; then cat /tmp/postgres_logfile; fi + +make "${@}" || status=$? +diffs="regression.diffs" + +if test -f "${diffs}"; then cat "${diffs}"; fi + +exit $status diff --git a/.gitignore b/.gitignore index 21c5e32ea..0c643e590 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,8 @@ .vscode *.pb-c.* + +# ignore files that could be created by circleci automation +files.lst +install-*.tar +install-*/ From 1e93e15a8d8f0f9fddc73c0f0c7880ba9cce6191 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Wed, 16 Sep 2020 15:21:57 +0200 Subject: [PATCH 026/124] fix indentation via citus_indent --- cstore.h | 4 +++- cstore_fdw.c | 46 ++++++++++++++++++++++++++++------------ cstore_metadata_tables.c | 3 +++ cstore_reader.c | 18 +++++++++------- cstore_version_compat.h | 2 +- cstore_writer.c | 21 +++++++++++------- 6 files changed, 62 insertions(+), 32 deletions(-) diff --git a/cstore.h b/cstore.h index c7f849cc9..87b552bbf 100644 --- a/cstore.h +++ b/cstore.h @@ -204,6 +204,7 @@ typedef struct TableReadState TableMetadata *tableMetadata; TupleDesc tupleDescriptor; Relation relation; + /* * List of Var pointers for columns in the query. We use this both for * getting vector of projected columns, and also when we want to build @@ -301,7 +302,7 @@ extern StripeSkipList * ReadStripeSkipList(Oid relid, uint64 stripe, typedef struct SmgrAddr { BlockNumber blockno; - uint32 offset; + uint32 offset; } SmgrAddr; /* @@ -320,4 +321,5 @@ logical_to_smgr(uint64 logicalOffset) return addr; } + #endif /* CSTORE_H */ diff --git a/cstore_fdw.c b/cstore_fdw.c index 36f576cee..512dee5a3 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -319,14 +319,14 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else if (nodeTag(parseTree) == T_DropStmt) { - List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); - ListCell *lc = NULL; + List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); + ListCell *lc = NULL; /* drop smgr storage */ foreach(lc, dropRelids) { - Oid relid = lfirst_oid(lc); - Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); + Oid relid = lfirst_oid(lc); + Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); RelationOpenSmgr(relation); RelationDropStorage(relation); @@ -334,7 +334,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + destReceiver, completionTag); } else if (nodeTag(parseTree) == T_TruncateStmt) { @@ -833,6 +833,7 @@ TruncateCStoreTables(List *cstoreRelationList) } } + /* * Version 11 and earlier already assign a relfilenode for foreign * tables. Version 12 and later do not, so we need to create one manually. @@ -840,26 +841,28 @@ TruncateCStoreTables(List *cstoreRelationList) static void FdwNewRelFileNode(Relation relation) { - Relation pg_class; - HeapTuple tuple; - Form_pg_class classform; + Relation pg_class; + HeapTuple tuple; + Form_pg_class classform; pg_class = heap_open(RelationRelationId, RowExclusiveLock); tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(RelationGetRelid(relation))); if (!HeapTupleIsValid(tuple)) + { elog(ERROR, "could not find tuple for relation %u", RelationGetRelid(relation)); + } classform = (Form_pg_class) GETSTRUCT(tuple); if (true) { - char persistence = relation->rd_rel->relpersistence; - Relation tmprel; - Oid tablespace; - Oid filenode; - RelFileNode newrnode; + char persistence = relation->rd_rel->relpersistence; + Relation tmprel; + Oid tablespace; + Oid filenode; + RelFileNode newrnode; /* * Upgrade to AccessExclusiveLock, and hold until the end of the @@ -870,12 +873,18 @@ FdwNewRelFileNode(Relation relation) heap_close(tmprel, NoLock); if (OidIsValid(relation->rd_rel->relfilenode)) + { RelationDropStorage(relation); + } if (OidIsValid(relation->rd_rel->reltablespace)) + { tablespace = relation->rd_rel->reltablespace; + } else + { tablespace = MyDatabaseTableSpace; + } filenode = GetNewRelFileNode(tablespace, NULL, persistence); @@ -898,6 +907,7 @@ FdwNewRelFileNode(Relation relation) heap_close(pg_class, RowExclusiveLock); } + static void FdwCreateStorage(Relation relation) { @@ -1692,7 +1702,7 @@ ColumnList(RelOptInfo *baserel, Oid foreignTableId) static void CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState) { - Relation relation = scanState->ss.ss_currentRelation; + Relation relation = scanState->ss.ss_currentRelation; cstore_fdw_initrel(relation); @@ -2187,16 +2197,22 @@ cstore_fdw_initrel(Relation rel) { #if PG_VERSION_NUM >= 120000 if (rel->rd_rel->relfilenode == InvalidOid) + { FdwNewRelFileNode(rel); + } /* * Copied code from RelationInitPhysicalAddr(), which doesn't * work on foreign tables. */ if (OidIsValid(rel->rd_rel->reltablespace)) + { rel->rd_node.spcNode = rel->rd_rel->reltablespace; + } else + { rel->rd_node.spcNode = MyDatabaseTableSpace; + } rel->rd_node.dbNode = MyDatabaseId; rel->rd_node.relNode = rel->rd_rel->relfilenode; @@ -2204,6 +2220,7 @@ cstore_fdw_initrel(Relation rel) FdwCreateStorage(rel); } + static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode) { @@ -2214,6 +2231,7 @@ cstore_fdw_open(Oid relationId, LOCKMODE lockmode) return rel; } + static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode) { diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index af0eb96c4..8a67a3a9e 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -692,9 +692,12 @@ create_estate_for_relation(Relation rel) estate->es_output_cid = GetCurrentCommandId(true); #if PG_VERSION_NUM < 120000 + /* Triggers might need a slot */ if (resultRelInfo->ri_TrigDesc) + { estate->es_trig_tuple_slot = ExecInitExtraTupleSlot(estate, NULL); + } #endif /* Prepare to catch AFTER triggers. */ diff --git a/cstore_reader.c b/cstore_reader.c index 929c65c04..fecb45605 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -1037,26 +1037,27 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor return defaultValue; } + static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size) { - StringInfo resultBuffer = makeStringInfo(); - uint64 read = 0; + StringInfo resultBuffer = makeStringInfo(); + uint64 read = 0; enlargeStringInfo(resultBuffer, size); resultBuffer->len = size; while (read < size) { - Buffer buffer; - Page page; - PageHeader phdr; - uint32 to_read; - SmgrAddr addr = logical_to_smgr(offset + read); + Buffer buffer; + Page page; + PageHeader phdr; + uint32 to_read; + SmgrAddr addr = logical_to_smgr(offset + read); buffer = ReadBuffer(rel, addr.blockno); page = BufferGetPage(buffer); - phdr = (PageHeader)page; + phdr = (PageHeader) page; to_read = Min(size - read, phdr->pd_upper - addr.offset); memcpy(resultBuffer->data + read, page + addr.offset, to_read); @@ -1067,6 +1068,7 @@ ReadFromSmgr(Relation rel, uint64 offset, uint32 size) return resultBuffer; } + /* * ResetUncompressedBlockData iterates over deserialized column block data * and sets valueBuffer field to empty buffer. This field is allocated in stripe diff --git a/cstore_version_compat.h b/cstore_version_compat.h index 95521c1aa..3d1a60f93 100644 --- a/cstore_version_compat.h +++ b/cstore_version_compat.h @@ -2,7 +2,7 @@ * * cstore_version_compat.h * - * Compatibility macros for writing code agnostic to PostgreSQL versions + * Compatibility macros for writing code agnostic to PostgreSQL versions * * Copyright (c) 2018, Citus Data, Inc. * diff --git a/cstore_writer.c b/cstore_writer.c index 5e44812bd..55a314ec4 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -363,21 +363,22 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, return stripeSkipList; } + static void WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) { - uint64 logicalOffset = writeState->currentFileOffset; - uint64 remaining = dataLength; - Relation rel = writeState->relation; - Buffer buffer; + uint64 logicalOffset = writeState->currentFileOffset; + uint64 remaining = dataLength; + Relation rel = writeState->relation; + Buffer buffer; while (remaining > 0) { - SmgrAddr addr = logical_to_smgr(logicalOffset); + SmgrAddr addr = logical_to_smgr(logicalOffset); BlockNumber nblocks; - Page page; - PageHeader phdr; - uint64 to_write; + Page page; + PageHeader phdr; + uint64 to_write; RelationOpenSmgr(rel); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); @@ -397,7 +398,9 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) page = BufferGetPage(buffer); phdr = (PageHeader) page; if (PageIsNew(page)) + { PageInit(page, BLCKSZ, 0); + } /* always appending */ Assert(phdr->pd_lower == addr.offset); @@ -434,6 +437,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) } } + /* * FlushStripe flushes current stripe data into the file. The function first ensures * the last data block for each column is properly serialized and compressed. Then, @@ -832,6 +836,7 @@ AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata stripeMetadataCopy); } + /* * CopyStringInfo creates a deep copy of given source string allocating only needed * amount of memory. From 48e9c17b5015705acd2baac35bc8164f908cf3e4 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 14 Aug 2020 16:43:13 -0700 Subject: [PATCH 027/124] stubs for table access method --- Makefile | 2 +- cstore_tableam.c | 404 +++++++++++++++++++++++++++++++++++++++++++++++ cstore_tableam.h | 6 + 3 files changed, 411 insertions(+), 1 deletion(-) create mode 100644 cstore_tableam.c create mode 100644 cstore_tableam.h diff --git a/Makefile b/Makefile index a266edd9a..f7943e61b 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ MODULE_big = cstore_fdw PG_CPPFLAGS = -std=c11 OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_compression.o mod.o cstore_metadata_tables.o + cstore_compression.o mod.o cstore_metadata_tables.o cstore_tableam.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ diff --git a/cstore_tableam.c b/cstore_tableam.c new file mode 100644 index 000000000..e64243ce2 --- /dev/null +++ b/cstore_tableam.c @@ -0,0 +1,404 @@ +#include "postgres.h" + +#include "cstore_tableam.h" +#include + +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "access/rewriteheap.h" +#include "access/tableam.h" +#include "access/tsmapi.h" +#include "access/tuptoaster.h" +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/storage.h" +#include "catalog/storage_xlog.h" +#include "commands/progress.h" +#include "executor/executor.h" +#include "optimizer/plancat.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/bufpage.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/predicate.h" +#include "storage/procarray.h" +#include "storage/smgr.h" +#include "utils/builtins.h" +#include "utils/rel.h" + + +static const TupleTableSlotOps * +cstore_slot_callbacks(Relation relation) +{ + return &TTSOpsVirtual; +} + +static TableScanDesc +cstore_beginscan(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags) +{ + elog(ERROR, "cstore_beginscan not implemented"); +} + +static void +cstore_endscan(TableScanDesc sscan) +{ + elog(ERROR, "cstore_endscan not implemented"); +} + +static void +cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, + bool allow_strat, bool allow_sync, bool allow_pagemode) +{ + elog(ERROR, "cstore_rescan not implemented"); +} + +static bool +cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) +{ + elog(ERROR, "cstore_getnextslot not implemented"); +} + +static Size +cstore_parallelscan_estimate(Relation rel) +{ + elog(ERROR, "cstore_parallelscan_estimate not implemented"); +} + +static Size +cstore_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) +{ + elog(ERROR, "cstore_parallelscan_initialize not implemented"); +} + +static void +cstore_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) +{ + elog(ERROR, "cstore_parallelscan_reinitialize not implemented"); +} + +static IndexFetchTableData * +cstore_index_fetch_begin(Relation rel) +{ + elog(ERROR, "cstore_index_fetch_begin not implemented"); +} + +static void +cstore_index_fetch_reset(IndexFetchTableData *scan) +{ + elog(ERROR, "cstore_index_fetch_reset not implemented"); +} + +static void +cstore_index_fetch_end(IndexFetchTableData *scan) +{ + elog(ERROR, "cstore_index_fetch_end not implemented"); +} + +static bool +cstore_index_fetch_tuple(struct IndexFetchTableData *scan, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot, + bool *call_again, bool *all_dead) +{ + elog(ERROR, "cstore_index_fetch_tuple not implemented"); +} + +static bool +cstore_fetch_row_version(Relation relation, + ItemPointer tid, + Snapshot snapshot, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_fetch_row_version not implemented"); +} + +static void +cstore_get_latest_tid(TableScanDesc sscan, + ItemPointer tid) +{ + elog(ERROR, "cstore_get_latest_tid not implemented"); +} + +static bool +cstore_tuple_tid_valid(TableScanDesc scan, ItemPointer tid) +{ + elog(ERROR, "cstore_tuple_tid_valid not implemented"); +} + +static bool +cstore_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, + Snapshot snapshot) +{ + return true; +} + +static TransactionId +cstore_compute_xid_horizon_for_tuples(Relation rel, + ItemPointerData *tids, + int nitems) +{ + elog(ERROR, "cstore_compute_xid_horizon_for_tuples not implemented"); +} + +static void +cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, + int options, BulkInsertState bistate) +{ + elog(ERROR, "cstore_tuple_insert not implemented"); +} + +static void +cstore_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, + CommandId cid, int options, + BulkInsertState bistate, uint32 specToken) +{ + elog(ERROR, "cstore_tuple_insert_speculative not implemented"); +} + +static void +cstore_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, + uint32 specToken, bool succeeded) +{ + elog(ERROR, "cstore_tuple_complete_speculative not implemented"); +} + +static void +cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, + CommandId cid, int options, BulkInsertState bistate) +{ + elog(ERROR, "cstore_multi_insert not implemented"); +} + +static TM_Result +cstore_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, + Snapshot snapshot, Snapshot crosscheck, bool wait, + TM_FailureData *tmfd, bool changingPart) +{ + elog(ERROR, "cstore_tuple_delete not implemented"); +} + +static TM_Result +cstore_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, + CommandId cid, Snapshot snapshot, Snapshot crosscheck, + bool wait, TM_FailureData *tmfd, + LockTupleMode *lockmode, bool *update_indexes) +{ + elog(ERROR, "cstore_tuple_update not implemented"); +} + +static TM_Result +cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, + TupleTableSlot *slot, CommandId cid, LockTupleMode mode, + LockWaitPolicy wait_policy, uint8 flags, + TM_FailureData *tmfd) +{ + elog(ERROR, "cstore_tuple_lock not implemented"); +} + +static void +cstore_finish_bulk_insert(Relation relation, int options) +{ + elog(ERROR, "cstore_finish_bulk_insert not implemented"); +} + +static void +cstore_relation_set_new_filenode(Relation rel, + const RelFileNode *newrnode, + char persistence, + TransactionId *freezeXid, + MultiXactId *minmulti) +{ + elog(ERROR, "cstore_relation_set_new_filenode not implemented"); +} + +static void +cstore_relation_nontransactional_truncate(Relation rel) +{ + elog(ERROR, "cstore_relation_nontransactional_truncate not implemented"); +} + +static void +cstore_relation_copy_data(Relation rel, const RelFileNode *newrnode) +{ + elog(ERROR, "cstore_relation_copy_data not implemented"); +} + +static void +cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, + Relation OldIndex, bool use_sort, + TransactionId OldestXmin, + TransactionId *xid_cutoff, + MultiXactId *multi_cutoff, + double *num_tuples, + double *tups_vacuumed, + double *tups_recently_dead) +{ + elog(ERROR, "cstore_relation_copy_for_cluster not implemented"); +} + +static bool +cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, + BufferAccessStrategy bstrategy) +{ + elog(ERROR, "cstore_scan_analyze_next_block not implemented"); +} + +static bool +cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, + double *liverows, double *deadrows, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_scan_analyze_next_tuple not implemented"); +} + +static double +cstore_index_build_range_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + bool allow_sync, + bool anyvisible, + bool progress, + BlockNumber start_blockno, + BlockNumber numblocks, + IndexBuildCallback callback, + void *callback_state, + TableScanDesc scan) +{ + elog(ERROR, "cstore_index_build_range_scan not implemented"); +} + +static void +cstore_index_validate_scan(Relation heapRelation, + Relation indexRelation, + IndexInfo *indexInfo, + Snapshot snapshot, + ValidateIndexState *state) +{ + elog(ERROR, "cstore_index_validate_scan not implemented"); +} + +static uint64 +cstore_relation_size(Relation rel, ForkNumber forkNumber) +{ + elog(ERROR, "cstore_relation_size not implemented"); +} + +static bool +cstore_relation_needs_toast_table(Relation rel) +{ + elog(ERROR, "cstore_relation_needs_toast_table not implemented"); +} + +static void +cstore_estimate_rel_size(Relation rel, int32 *attr_widths, + BlockNumber *pages, double *tuples, + double *allvisfrac) +{ + elog(ERROR, "cstore_estimate_rel_size not implemented"); +} + +static bool +cstore_scan_bitmap_next_block(TableScanDesc scan, + TBMIterateResult *tbmres) +{ + elog(ERROR, "cstore_scan_bitmap_next_block not implemented"); +} + +static bool +cstore_scan_bitmap_next_tuple(TableScanDesc scan, + TBMIterateResult *tbmres, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_scan_bitmap_next_tuple not implemented"); +} + +static bool +cstore_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) +{ + elog(ERROR, "cstore_scan_sample_next_block not implemented"); +} + +static bool +cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, + TupleTableSlot *slot) +{ + elog(ERROR, "cstore_scan_sample_next_tuple not implemented"); +} + +static const TableAmRoutine cstore_am_methods = { + .type = T_TableAmRoutine, + + .slot_callbacks = cstore_slot_callbacks, + + .scan_begin = cstore_beginscan, + .scan_end = cstore_endscan, + .scan_rescan = cstore_rescan, + .scan_getnextslot = cstore_getnextslot, + + .parallelscan_estimate = cstore_parallelscan_estimate, + .parallelscan_initialize = cstore_parallelscan_initialize, + .parallelscan_reinitialize = cstore_parallelscan_reinitialize, + + .index_fetch_begin = cstore_index_fetch_begin, + .index_fetch_reset = cstore_index_fetch_reset, + .index_fetch_end = cstore_index_fetch_end, + .index_fetch_tuple = cstore_index_fetch_tuple, + + .tuple_fetch_row_version = cstore_fetch_row_version, + .tuple_get_latest_tid = cstore_get_latest_tid, + .tuple_tid_valid = cstore_tuple_tid_valid, + .tuple_satisfies_snapshot = cstore_tuple_satisfies_snapshot, + .compute_xid_horizon_for_tuples = cstore_compute_xid_horizon_for_tuples, + + .tuple_insert = cstore_tuple_insert, + .tuple_insert_speculative = cstore_tuple_insert_speculative, + .tuple_complete_speculative = cstore_tuple_complete_speculative, + .multi_insert = cstore_multi_insert, + .tuple_delete = cstore_tuple_delete, + .tuple_update = cstore_tuple_update, + .tuple_lock = cstore_tuple_lock, + .finish_bulk_insert = cstore_finish_bulk_insert, + + .relation_set_new_filenode = cstore_relation_set_new_filenode, + .relation_nontransactional_truncate = cstore_relation_nontransactional_truncate, + .relation_copy_data = cstore_relation_copy_data, + .relation_copy_for_cluster = cstore_relation_copy_for_cluster, + .relation_vacuum = heap_vacuum_rel, + .scan_analyze_next_block = cstore_scan_analyze_next_block, + .scan_analyze_next_tuple = cstore_scan_analyze_next_tuple, + .index_build_range_scan = cstore_index_build_range_scan, + .index_validate_scan = cstore_index_validate_scan, + + .relation_size = cstore_relation_size, + .relation_needs_toast_table = cstore_relation_needs_toast_table, + + .relation_estimate_size = cstore_estimate_rel_size, + + .scan_bitmap_next_block = cstore_scan_bitmap_next_block, + .scan_bitmap_next_tuple = cstore_scan_bitmap_next_tuple, + .scan_sample_next_block = cstore_scan_sample_next_block, + .scan_sample_next_tuple = cstore_scan_sample_next_tuple +}; + + +const TableAmRoutine * +GetCstoreTableAmRoutine(void) +{ + return &cstore_am_methods; +} + +PG_FUNCTION_INFO_V1(cstore_tableam_handler); +Datum +cstore_tableam_handler(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(&cstore_am_methods); +} diff --git a/cstore_tableam.h b/cstore_tableam.h new file mode 100644 index 000000000..67dbe0d87 --- /dev/null +++ b/cstore_tableam.h @@ -0,0 +1,6 @@ +#include "postgres.h" +#include "fmgr.h" +#include "access/tableam.h" + +const TableAmRoutine *GetCstoreTableAmRoutine(void); +Datum cstore_tableam_handler(PG_FUNCTION_ARGS); From b6ca8fcd70e3c33addb2e897ad93975b7040cb6c Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 16:50:06 -0700 Subject: [PATCH 028/124] extension control --- Makefile | 2 +- cstore_fdw--1.7--1.8.sql | 9 +++++++++ cstore_fdw.control | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) create mode 100644 cstore_fdw--1.7--1.8.sql diff --git a/Makefile b/Makefile index f7943e61b..97f4d9e64 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ - cstore_fdw--1.0--1.1.sql + cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql REGRESS = fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate diff --git a/cstore_fdw--1.7--1.8.sql b/cstore_fdw--1.7--1.8.sql new file mode 100644 index 000000000..b1519d73e --- /dev/null +++ b/cstore_fdw--1.7--1.8.sql @@ -0,0 +1,9 @@ +/* cstore_fdw/cstore_fdw--1.7--1.8.sql */ + +CREATE FUNCTION cstore_tableam_handler(internal) +RETURNS table_am_handler +LANGUAGE C +AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; + +CREATE ACCESS METHOD cstore_tableam +TYPE TABLE HANDLER cstore_tableam_handler; diff --git a/cstore_fdw.control b/cstore_fdw.control index 6f781dcbb..57fd0808a 100644 --- a/cstore_fdw.control +++ b/cstore_fdw.control @@ -1,6 +1,6 @@ # cstore_fdw extension comment = 'foreign-data wrapper for flat cstore access' -default_version = '1.7' +default_version = '1.8' module_pathname = '$libdir/cstore_fdw' relocatable = false schema = cstore From b06f48a2a79c38b18132374f8f797743e5a15c45 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 11 Sep 2020 16:51:09 -0700 Subject: [PATCH 029/124] tableAM updates --- cstore_tableam.c | 171 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 161 insertions(+), 10 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index e64243ce2..d67ac10b6 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -1,6 +1,5 @@ #include "postgres.h" -#include "cstore_tableam.h" #include #include "miscadmin.h" @@ -19,6 +18,7 @@ #include "catalog/storage_xlog.h" #include "commands/progress.h" #include "executor/executor.h" +#include "nodes/makefuncs.h" #include "optimizer/plancat.h" #include "pgstat.h" #include "storage/bufmgr.h" @@ -31,6 +31,57 @@ #include "utils/builtins.h" #include "utils/rel.h" +#include "cstore_tableam.h" +#include "cstore_fdw.h" + +typedef struct CStoreScanDescData +{ + TableScanDescData cs_base; + TableReadState *cs_readState; +} CStoreScanDescData; + +typedef struct CStoreScanDescData *CStoreScanDesc; + +static TableWriteState *CStoreWriteState = NULL; + +static void +cstore_init_write_state(Relation relation) +{ + //TODO: upgrade lock to serialize writes + + if (CStoreWriteState != NULL) + { + // TODO: consider whether it's possible for a new write to start + // before an old one is flushed + Assert(CStoreWriteState->relation->rd_id == relation->rd_id); + } + + if (CStoreWriteState == NULL) + { + CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(relation->rd_id); + TupleDesc tupdesc = RelationGetDescr(relation); + + elog(NOTICE, "initializing write state for relation %d", relation->rd_id); + CStoreWriteState = CStoreBeginWrite(cstoreFdwOptions->filename, + cstoreFdwOptions->compressionType, + cstoreFdwOptions->stripeRowCount, + cstoreFdwOptions->blockRowCount, + tupdesc); + + CStoreWriteState->relation = relation; + } +} + +void +cstore_free_write_state() +{ + if (CStoreWriteState != NULL) + { + elog(NOTICE, "flushing write state for relation %d", CStoreWriteState->relation->rd_id); + CStoreEndWrite(CStoreWriteState); + CStoreWriteState = NULL; + } +} static const TupleTableSlotOps * cstore_slot_callbacks(Relation relation) @@ -44,13 +95,48 @@ cstore_beginscan(Relation relation, Snapshot snapshot, ParallelTableScanDesc parallel_scan, uint32 flags) { - elog(ERROR, "cstore_beginscan not implemented"); + TupleDesc tupdesc = relation->rd_att; + CStoreFdwOptions *cstoreFdwOptions = NULL; + TableReadState *readState = NULL; + CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); + List *columnList = NIL; + + cstoreFdwOptions = CStoreGetOptions(relation->rd_id); + + scan->cs_base.rs_rd = relation; + scan->cs_base.rs_snapshot = snapshot; + scan->cs_base.rs_nkeys = nkeys; + scan->cs_base.rs_key = key; + scan->cs_base.rs_flags = flags; + scan->cs_base.rs_parallel = parallel_scan; + + for (int i = 0; i < tupdesc->natts; i++) + { + Index varno = 0; + AttrNumber varattno = i+1; + Oid vartype = tupdesc->attrs[i].atttypid; + int32 vartypmod = 0; + Oid varcollid = 0; + Index varlevelsup = 0; + Var *var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); + + columnList = lappend(columnList, var); + } + + readState = CStoreBeginRead(cstoreFdwOptions->filename, tupdesc, + columnList, NULL); + + scan->cs_readState = readState; + + return ((TableScanDesc) scan); } static void cstore_endscan(TableScanDesc sscan) { - elog(ERROR, "cstore_endscan not implemented"); + CStoreScanDesc scan = (CStoreScanDesc) sscan; + CStoreEndRead(scan->cs_readState); } static void @@ -63,7 +149,22 @@ cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, static bool cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { - elog(ERROR, "cstore_getnextslot not implemented"); + CStoreScanDesc scan = (CStoreScanDesc) sscan; + TupleDesc tupdesc = slot->tts_tupleDescriptor; + int natts = tupdesc->natts; + bool nextRowFound; + + ExecClearTuple(slot); + memset(slot->tts_values, 0, sizeof(Datum) * natts); + memset(slot->tts_isnull, true, sizeof(bool) * natts); + + nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, slot->tts_isnull); + + if (!nextRowFound) + return false; + + ExecStoreVirtualTuple(slot); + return true; } static Size @@ -153,7 +254,23 @@ static void cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { - elog(ERROR, "cstore_tuple_insert not implemented"); + HeapTuple heapTuple; + + cstore_init_write_state(relation); + + heapTuple = GetSlotHeapTuple(slot); + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + slot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, slot, true); + } + + slot_getallattrs(slot); + + CStoreWriteRow(CStoreWriteState, slot->tts_values, slot->tts_isnull); } static void @@ -175,7 +292,26 @@ static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { - elog(ERROR, "cstore_multi_insert not implemented"); + cstore_init_write_state(relation); + + for (int i = 0; i < ntuples; i++) + { + TupleTableSlot *tupleSlot = slots[i]; + HeapTuple heapTuple = GetSlotHeapTuple(tupleSlot); + + if (HeapTupleHasExternal(heapTuple)) + { + /* detoast any toasted attributes */ + HeapTuple newTuple = toast_flatten_tuple(heapTuple, + tupleSlot->tts_tupleDescriptor); + + ExecForceStoreHeapTuple(newTuple, tupleSlot, true); + } + + slot_getallattrs(tupleSlot); + + CStoreWriteRow(CStoreWriteState, tupleSlot->tts_values, tupleSlot->tts_isnull); + } } static TM_Result @@ -207,7 +343,11 @@ cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, static void cstore_finish_bulk_insert(Relation relation, int options) { - elog(ERROR, "cstore_finish_bulk_insert not implemented"); + //TODO: flush relation like for heap? + // free write state or only in ExecutorEnd_hook? + + // for COPY + cstore_free_write_state(); } static void @@ -217,7 +357,15 @@ cstore_relation_set_new_filenode(Relation rel, TransactionId *freezeXid, MultiXactId *minmulti) { - elog(ERROR, "cstore_relation_set_new_filenode not implemented"); + SMgrRelation srel; + + Assert(persistence == RELPERSISTENCE_PERMANENT); + *freezeXid = RecentXmin; + *minmulti = GetOldestMultiXactId(); + srel = RelationCreateStorage(*newrnode, persistence); + CreateCStoreDatabaseDirectory(MyDatabaseId); + InitializeCStoreTableFile(rel->rd_id, rel); + smgrclose(srel); } static void @@ -295,7 +443,7 @@ cstore_relation_size(Relation rel, ForkNumber forkNumber) static bool cstore_relation_needs_toast_table(Relation rel) { - elog(ERROR, "cstore_relation_needs_toast_table not implemented"); + return false; } static void @@ -303,7 +451,10 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { - elog(ERROR, "cstore_estimate_rel_size not implemented"); + *attr_widths = 12; + *tuples = 100; + *pages = 10; + *allvisfrac = 1.0; } static bool From aa422f2da020604c80361571eb31bfd08dac9c90 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 09:01:38 -0700 Subject: [PATCH 030/124] fixup rebase --- cstore.h | 2 -- cstore_tableam.c | 46 ++++++++++++++++++++++++++++------------------ cstore_tableam.h | 1 + 3 files changed, 29 insertions(+), 20 deletions(-) diff --git a/cstore.h b/cstore.h index 87b552bbf..9a1764972 100644 --- a/cstore.h +++ b/cstore.h @@ -252,8 +252,6 @@ typedef struct TableWriteState extern CompressionType ParseCompressionType(const char *compressionTypeString); extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); -extern void CreateCStoreDatabaseDirectory(Oid databaseOid); -extern void RemoveCStoreDatabaseDirectory(Oid databaseOid); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(Oid relationId, diff --git a/cstore_tableam.c b/cstore_tableam.c index d67ac10b6..f93971c59 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -31,8 +31,8 @@ #include "utils/builtins.h" #include "utils/rel.h" +#include "cstore.h" #include "cstore_tableam.h" -#include "cstore_fdw.h" typedef struct CStoreScanDescData { @@ -44,6 +44,16 @@ typedef struct CStoreScanDescData *CStoreScanDesc; static TableWriteState *CStoreWriteState = NULL; +static CStoreOptions * +CStoreGetDefaultOptions(void) +{ + CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); + cstoreOptions->compressionType = DEFAULT_COMPRESSION_TYPE; + cstoreOptions->stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; + cstoreOptions->blockRowCount = DEFAULT_BLOCK_ROW_COUNT; + return cstoreOptions; +} + static void cstore_init_write_state(Relation relation) { @@ -58,14 +68,14 @@ cstore_init_write_state(Relation relation) if (CStoreWriteState == NULL) { - CStoreFdwOptions *cstoreFdwOptions = CStoreGetOptions(relation->rd_id); + CStoreOptions *cstoreOptions = CStoreGetDefaultOptions(); TupleDesc tupdesc = RelationGetDescr(relation); elog(NOTICE, "initializing write state for relation %d", relation->rd_id); - CStoreWriteState = CStoreBeginWrite(cstoreFdwOptions->filename, - cstoreFdwOptions->compressionType, - cstoreFdwOptions->stripeRowCount, - cstoreFdwOptions->blockRowCount, + CStoreWriteState = CStoreBeginWrite(relation->rd_id, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, tupdesc); CStoreWriteState->relation = relation; @@ -95,13 +105,14 @@ cstore_beginscan(Relation relation, Snapshot snapshot, ParallelTableScanDesc parallel_scan, uint32 flags) { - TupleDesc tupdesc = relation->rd_att; - CStoreFdwOptions *cstoreFdwOptions = NULL; - TableReadState *readState = NULL; - CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); - List *columnList = NIL; + Oid relid = relation->rd_id; + TupleDesc tupdesc = relation->rd_att; + CStoreOptions *cstoreOptions = NULL; + TableReadState *readState = NULL; + CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); + List *columnList = NIL; - cstoreFdwOptions = CStoreGetOptions(relation->rd_id); + cstoreOptions = CStoreGetDefaultOptions(); scan->cs_base.rs_rd = relation; scan->cs_base.rs_snapshot = snapshot; @@ -124,8 +135,8 @@ cstore_beginscan(Relation relation, Snapshot snapshot, columnList = lappend(columnList, var); } - readState = CStoreBeginRead(cstoreFdwOptions->filename, tupdesc, - columnList, NULL); + readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); + readState->relation = relation; scan->cs_readState = readState; @@ -258,7 +269,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, cstore_init_write_state(relation); - heapTuple = GetSlotHeapTuple(slot); + heapTuple = ExecCopySlotHeapTuple(slot); if (HeapTupleHasExternal(heapTuple)) { /* detoast any toasted attributes */ @@ -297,7 +308,7 @@ cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, for (int i = 0; i < ntuples; i++) { TupleTableSlot *tupleSlot = slots[i]; - HeapTuple heapTuple = GetSlotHeapTuple(tupleSlot); + HeapTuple heapTuple = ExecCopySlotHeapTuple(tupleSlot); if (HeapTupleHasExternal(heapTuple)) { @@ -363,8 +374,7 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - CreateCStoreDatabaseDirectory(MyDatabaseId); - InitializeCStoreTableFile(rel->rd_id, rel); + InitializeCStoreTableFile(rel->rd_id, rel, CStoreGetDefaultOptions()); smgrclose(srel); } diff --git a/cstore_tableam.h b/cstore_tableam.h index 67dbe0d87..f81c13155 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -4,3 +4,4 @@ const TableAmRoutine *GetCstoreTableAmRoutine(void); Datum cstore_tableam_handler(PG_FUNCTION_ARGS); +extern void cstore_free_write_state(void); From f886fb33e5f639803c2f8afa8850f5e7dc332241 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 13:01:09 -0700 Subject: [PATCH 031/124] add AM tests --- Makefile | 4 +- expected/am_alter.out | 178 +++++++++++++++++++++ expected/am_analyze.out | 19 +++ expected/am_block_filtering.out | 116 ++++++++++++++ expected/am_copyto.out | 23 +++ expected/am_create.out | 44 ++++++ expected/am_data_types.out | 78 +++++++++ expected/am_drop.out | 40 +++++ expected/am_functions.out | 18 +++ expected/am_insert.out | 88 +++++++++++ expected/am_load.out | 39 +++++ expected/am_query.out | 105 +++++++++++++ expected/am_truncate.out | 231 +++++++++++++++++++++++++++ expected/am_truncate_0.out | 262 +++++++++++++++++++++++++++++++ input/am_block_filtering.source | 69 ++++++++ input/am_copyto.source | 17 ++ input/am_create.source | 43 +++++ input/am_data_types.source | 68 ++++++++ input/am_load.source | 44 ++++++ output/am_block_filtering.source | 116 ++++++++++++++ output/am_copyto.source | 23 +++ output/am_create.source | 44 ++++++ output/am_data_types.source | 78 +++++++++ output/am_load.source | 39 +++++ sql/am_alter.sql | 85 ++++++++++ sql/am_analyze.sql | 11 ++ sql/am_block_filtering.sql | 69 ++++++++ sql/am_copyto.sql | 17 ++ sql/am_create.sql | 43 +++++ sql/am_data_types.sql | 68 ++++++++ sql/am_drop.sql | 48 ++++++ sql/am_functions.sql | 20 +++ sql/am_insert.sql | 56 +++++++ sql/am_load.sql | 44 ++++++ sql/am_query.sql | 34 ++++ sql/am_truncate.sql | 116 ++++++++++++++ 36 files changed, 2396 insertions(+), 1 deletion(-) create mode 100644 expected/am_alter.out create mode 100644 expected/am_analyze.out create mode 100644 expected/am_block_filtering.out create mode 100644 expected/am_copyto.out create mode 100644 expected/am_create.out create mode 100644 expected/am_data_types.out create mode 100644 expected/am_drop.out create mode 100644 expected/am_functions.out create mode 100644 expected/am_insert.out create mode 100644 expected/am_load.out create mode 100644 expected/am_query.out create mode 100644 expected/am_truncate.out create mode 100644 expected/am_truncate_0.out create mode 100644 input/am_block_filtering.source create mode 100644 input/am_copyto.source create mode 100644 input/am_create.source create mode 100644 input/am_data_types.source create mode 100644 input/am_load.source create mode 100644 output/am_block_filtering.source create mode 100644 output/am_copyto.source create mode 100644 output/am_create.source create mode 100644 output/am_data_types.source create mode 100644 output/am_load.source create mode 100644 sql/am_alter.sql create mode 100644 sql/am_analyze.sql create mode 100644 sql/am_block_filtering.sql create mode 100644 sql/am_copyto.sql create mode 100644 sql/am_create.sql create mode 100644 sql/am_data_types.sql create mode 100644 sql/am_drop.sql create mode 100644 sql/am_functions.sql create mode 100644 sql/am_insert.sql create mode 100644 sql/am_load.sql create mode 100644 sql/am_query.sql create mode 100644 sql/am_truncate.sql diff --git a/Makefile b/Makefile index 97f4d9e64..2fc550ca9 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,9 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -REGRESS = fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ +REGRESS = am_create am_load am_query am_analyze am_data_types am_functions \ + am_block_filtering am_drop am_insert am_copyto am_alter am_truncate \ + fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ diff --git a/expected/am_alter.out b/expected/am_alter.out new file mode 100644 index 000000000..659e2723e --- /dev/null +++ b/expected/am_alter.out @@ -0,0 +1,178 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +-- test analyze +ANALYZE test_alter_table; +-- verify select queries run as expected +SELECT * FROM test_alter_table; + b | c +---+--- + 2 | 3 + 5 | 6 + 8 | 9 +(3 rows) + +SELECT a FROM test_alter_table; +ERROR: column "a" does not exist +LINE 1: SELECT a FROM test_alter_table; + ^ +SELECT b FROM test_alter_table; + b +--- + 2 + 5 + 8 +(3 rows) + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +ERROR: INSERT has more expressions than target columns +LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); + ^ +INSERT INTO test_alter_table (SELECT 5, 8); +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | +(4 rows) + +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + b | c | d +---+---+--- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | + 3 | 5 | 8 +(5 rows) + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 +(5 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + b | c | d | e +---+---+---+--- + 2 | 3 | | 3 + 5 | 6 | | 3 + 8 | 9 | | 3 + 5 | 8 | | 3 + 3 | 5 | 8 | 3 + 1 | 2 | 4 | 8 +(6 rows) + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME +(6 rows) + +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + b | c | d | e | f +---+---+---+---+--------- + 2 | 3 | | 3 | TEXT ME + 5 | 6 | | 3 | TEXT ME + 8 | 9 | | 3 | TEXT ME + 5 | 8 | | 3 | TEXT ME + 3 | 5 | 8 | 3 | TEXT ME + 1 | 2 | 4 | 8 | TEXT ME + 1 | 2 | 4 | 8 | ABCDEF +(7 rows) + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; + b | d | f +---+---+--------- + 2 | | TEXT ME + 5 | | TEXT ME + 8 | | TEXT ME + 5 | | TEXT ME + 3 | 8 | TEXT ME + 1 | 4 | TEXT ME + 1 | 4 | ABCDEF +(7 rows) + +SELECT count(*) from test_alter_table; + count +------- + 7 +(1 row) + +SELECT count(t.*) from test_alter_table t; + count +------- + 7 +(1 row) + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "g" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ERROR: unsupported default value for column "h" +HINT: Expression is either mutable or does not evaluate to constant value +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + b | d | f | g | h +---+---+---------+---+--- + 2 | | TEXT ME | | + 5 | | TEXT ME | | + 8 | | TEXT ME | | + 5 | | TEXT ME | | + 3 | 8 | TEXT ME | | + 1 | 4 | TEXT ME | | + 1 | 4 | ABCDEF | | +(7 rows) + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; +ERROR: Column j cannot be cast automatically to type pg_catalog.int4 +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; +DROP FOREIGN TABLE test_alter_table; diff --git a/expected/am_analyze.out b/expected/am_analyze.out new file mode 100644 index 000000000..f8c4d974a --- /dev/null +++ b/expected/am_analyze.out @@ -0,0 +1,19 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + count +------- + 6 +(1 row) + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; + count +------- + 6 +(1 row) + diff --git a/expected/am_block_filtering.out b/expected/am_block_filtering.out new file mode 100644 index 000000000..bccfafd15 --- /dev/null +++ b/expected/am_block_filtering.out @@ -0,0 +1,116 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); +COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); + filtered_row_count +-------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); + filtered_row_count +-------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); + filtered_row_count +-------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +-------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 3958 +(1 row) + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; + a +--- + Å +(1 row) + diff --git a/expected/am_copyto.out b/expected/am_copyto.out new file mode 100644 index 000000000..2b68d0ad5 --- /dev/null +++ b/expected/am_copyto.out @@ -0,0 +1,23 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- load table data from file +COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/expected/am_create.out b/expected/am_create.out new file mode 100644 index 000000000..961c0494d --- /dev/null +++ b/expected/am_create.out @@ -0,0 +1,44 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR +ERROR: invalid option "bad_option_name" +HINT: Valid options in this context are: compression, stripe_row_count, block_row_count +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR +ERROR: invalid stripe row count +HINT: Stripe row count must be an integer between 1000 and 10000000 +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR +ERROR: invalid block row count +HINT: Block row count must be an integer between 1000 and 100000 +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR +ERROR: invalid compression type +HINT: Valid options are: none, pglz +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/expected/am_data_types.out b/expected/am_data_types.out new file mode 100644 index 000000000..a27a25eb9 --- /dev/null +++ b/expected/am_data_types.out @@ -0,0 +1,78 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; +COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; +SELECT * FROM test_array_types; + int_array | bigint_array | text_array +--------------------------+--------------------------------------------+------------ + {1,2,3} | {1,2,3} | {a,b,c} + {} | {} | {} + {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} +(3 rows) + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; +COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; +SELECT * FROM test_datetime_types; + timestamp | timestamp_with_timezone | date | time | interval +---------------------+-------------------------+------------+----------+----------- + 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours + 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 +(2 rows) + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; +COPY test_enum_and_composite_types FROM + '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; +SELECT * FROM test_enum_and_composite_types; + enum | composite +------+----------- + a | (2,b) + b | (3,c) +(2 rows) + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; +COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; +SELECT * FROM test_range_types; + int4range | int8range | numrange | tsrange +-----------+-----------+----------+----------------------------------------------- + [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") + empty | [1,) | (,) | empty +(2 rows) + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; +SELECT * FROM test_other_types; + bool | bytea | money | inet | bitstring | uuid | json +------+------------+-------+-------------+-----------+--------------------------------------+------------------ + f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} + t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] +(2 rows) + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; +COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; +SELECT * FROM test_null_values; + a | b | c +---+--------+----- + | {NULL} | (,) + | | +(2 rows) + diff --git a/expected/am_drop.out b/expected/am_drop.out new file mode 100644 index 000000000..926f69337 --- /dev/null +++ b/expected/am_drop.out @@ -0,0 +1,40 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +DROP SCHEMA test_schema CASCADE; +NOTICE: drop cascades to foreign table test_schema.test_table +SELECT current_database() datname \gset +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +DROP EXTENSION cstore_fdw CASCADE; +NOTICE: drop cascades to 2 other objects +DETAIL: drop cascades to server cstore_server +drop cascades to foreign table test_table +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +\c :datname +DROP DATABASE db_to_drop; diff --git a/expected/am_functions.out b/expected/am_functions.out new file mode 100644 index 000000000..117fc15f9 --- /dev/null +++ b/expected/am_functions.out @@ -0,0 +1,18 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); +COPY table_with_data FROM STDIN; +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); + ?column? +---------- + t +(1 row) + +SELECT cstore_table_size('non_cstore_table'); +ERROR: relation is not a cstore table +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/expected/am_insert.out b/expected/am_insert.out new file mode 100644 index 000000000..49d9ed132 --- /dev/null +++ b/expected/am_insert.out @@ -0,0 +1,88 @@ +-- +-- Testing insert on cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +-- test single row inserts fail +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command values(1); +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +insert into test_insert_command default values; +ERROR: operation is not supported +select count(*) from test_insert_command; + count +------- + 0 +(1 row) + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); +select count(*) from test_insert_command_data; + count +------- + 0 +(1 row) + +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + count +------- + 1 +(1 row) + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + count +------- + 1 +(1 row) + +drop table test_insert_command_data; +drop foreign table test_insert_command; +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; +-- drop source table to remove original text from toast +DROP TABLE test_long_text; +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + int_val +--------- + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 +(10 rows) + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/expected/am_load.out b/expected/am_load.out new file mode 100644 index 000000000..162ece55b --- /dev/null +++ b/expected/am_load.out @@ -0,0 +1,39 @@ +-- +-- Test loading data into cstore_fdw tables. +-- +-- COPY with incorrect delimiter +COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR +ERROR: missing data for column "birthdate" +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR +ERROR: program "invalid_program" failed +DETAIL: command not found +-- COPY into uncompressed table from file +COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; +-- COPY into compressed table +COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' + WITH CSV; +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +COPY famous_constants (name, value) FROM STDIN WITH CSV; +SELECT * FROM famous_constants ORDER BY id, name; + id | name | value +----+----------------+----------- + 1 | pi | 3.141 + 2 | e | 2.718 + 3 | gamma | 0.577 + 4 | bohr radius | 5.291e-11 + | avagadro | 6.022e+23 + | electron mass | 9.109e-31 + | proton mass | 1.672e-27 + | speed of light | 2.997e+08 +(8 rows) + +DROP FOREIGN TABLE famous_constants; diff --git a/expected/am_query.out b/expected/am_query.out new file mode 100644 index 000000000..7ac3508a4 --- /dev/null +++ b/expected/am_query.out @@ -0,0 +1,105 @@ +-- +-- Test querying cstore_fdw tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +-- Query uncompressed data +SELECT count(*) FROM contestant; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; + count +------- + 8 +(1 row) + +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; + avg | stddev_samp +-----------------------+------------------ + 2344.3750000000000000 | 433.746119785032 +(1 row) + +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; + country | avg +---------+----------------------- + XA | 2203.0000000000000000 + XB | 2610.5000000000000000 + XC | 2236.0000000000000000 + XD | 3090.0000000000000000 +(4 rows) + +SELECT * FROM contestant_compressed ORDER BY handle; + handle | birthdate | rating | percentile | country | achievements +--------+------------+--------+------------+---------+-------------- + a | 1990-01-10 | 2090 | 97.1 | XA | {a} + b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} + c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} + d | 1985-05-05 | 2314 | 98.3 | XB | {} + e | 1995-05-05 | 2236 | 98.2 | XC | {a} + f | 1983-04-02 | 3090 | 99.6 | XD | {a,b,c,y} + g | 1991-12-13 | 1803 | 85.1 | XD | {a,c} + h | 1987-10-26 | 2112 | 95.4 | XD | {w,a} +(8 rows) + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + to_json +------------------------------------------------------------------------------------------------------------------ + {"handle":"g","birthdate":"1991-12-13","rating":1803,"percentile":85.1,"country":"XD ","achievements":["a","c"]} +(1 row) + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + ?column? | b +----------+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 +(10 rows) + +DROP FOREIGN TABLE union_first, union_second; diff --git a/expected/am_truncate.out b/expected/am_truncate.out new file mode 100644 index 000000000..c92c15559 --- /dev/null +++ b/expected/am_truncate.out @@ -0,0 +1,231 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + t +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 0 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for table truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; diff --git a/expected/am_truncate_0.out b/expected/am_truncate_0.out new file mode 100644 index 000000000..c8cc4ad98 --- /dev/null +++ b/expected/am_truncate_0.out @@ -0,0 +1,262 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + version_above_ten +------------------- + f +(1 row) + +-- Check that files for the automatically managed table exist in the +-- cstore_fdw/{databaseoid} directory. +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +-- query rows +SELECT * FROM cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +TRUNCATE TABLE cstore_truncate_test; +SELECT * FROM cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT COUNT(*) from cstore_truncate_test; + count +------- + 0 +(1 row) + +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 20 +(1 row) + +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + count +------- + 0 +(1 row) + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + cstore_table_size +------------------- + 26 +(1 row) + +-- make sure data files still present +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 6 +(1 row) + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; +SELECT * from cstore_truncate_test; + a | b +----+---- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 + 6 | 6 + 7 | 7 + 8 | 8 + 9 | 9 + 10 | 10 +(10 rows) + +SELECT * from cstore_truncate_test_second; + a | b +----+---- + 20 | 20 + 21 | 21 + 22 | 22 + 23 | 23 + 24 | 24 + 25 | 25 + 26 | 26 + 27 | 27 + 28 | 28 + 29 | 29 + 30 | 30 +(11 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +----+---- + 10 | 10 + 11 | 11 + 12 | 12 + 13 | 13 + 14 | 14 + 15 | 15 + 16 | 16 + 17 | 17 + 18 | 18 + 19 | 19 + 20 | 20 +(11 rows) + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_second; + a | b +---+--- +(0 rows) + +SELECT * from cstore_truncate_test_regular; + a | b +---+--- +(0 rows) + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + a | b +---+--- +(0 rows) + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); + cstore_truncate_test_regular_func +----------------------------------- + +(1 row) + +DROP FUNCTION cstore_truncate_test_regular_func(); +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; +SELECT current_user \gset +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +ERROR: permission denied for relation truncate_tbl +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 100 +(1 row) + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + count +------- + 0 +(1 row) + +\c - :current_user +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +DROP USER truncate_user; +-- verify files are removed +SELECT count(*) FROM ( + SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( + SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() + ) AS q1) AS q2; + count +------- + 0 +(1 row) + diff --git a/input/am_block_filtering.source b/input/am_block_filtering.source new file mode 100644 index 000000000..dc3170f0d --- /dev/null +++ b/input/am_block_filtering.source @@ -0,0 +1,69 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- + + +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; + + +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); + +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; + + +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +A +Å +B +\. + +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/input/am_copyto.source b/input/am_copyto.source new file mode 100644 index 000000000..a4b753a8d --- /dev/null +++ b/input/am_copyto.source @@ -0,0 +1,17 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; + +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; + +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/input/am_create.source b/input/am_create.source new file mode 100644 index 000000000..ba52137c1 --- /dev/null +++ b/input/am_create.source @@ -0,0 +1,43 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- + + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + + +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR + +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + + +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); + +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; diff --git a/input/am_data_types.source b/input/am_data_types.source new file mode 100644 index 000000000..ec83c4d8c --- /dev/null +++ b/input/am_data_types.source @@ -0,0 +1,68 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- + + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; + + +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; + +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; + +SELECT * FROM test_array_types; + + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; + +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; + +SELECT * FROM test_datetime_types; + + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); + +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; + +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; + +SELECT * FROM test_enum_and_composite_types; + + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; + +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; + +SELECT * FROM test_range_types; + + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; + +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; + +SELECT * FROM test_other_types; + + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; + +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; + +SELECT * FROM test_null_values; diff --git a/input/am_load.source b/input/am_load.source new file mode 100644 index 000000000..0913acde7 --- /dev/null +++ b/input/am_load.source @@ -0,0 +1,44 @@ +-- +-- Test loading data into cstore_fdw tables. +-- + +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR + +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR + +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; + +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; + +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +3.141,pi,1 +2.718,e,2 +0.577,gamma,3 +5.291e-11,bohr radius,4 +\. + +COPY famous_constants (name, value) FROM STDIN WITH CSV; +avagadro,6.022e23 +electron mass,9.109e-31 +proton mass,1.672e-27 +speed of light,2.997e8 +\. + +SELECT * FROM famous_constants ORDER BY id, name; + +DROP FOREIGN TABLE famous_constants; diff --git a/output/am_block_filtering.source b/output/am_block_filtering.source new file mode 100644 index 000000000..2f664a78a --- /dev/null +++ b/output/am_block_filtering.source @@ -0,0 +1,116 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); + filtered_row_count +-------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); + filtered_row_count +-------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); + filtered_row_count +-------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +-------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); + filtered_row_count +-------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + filtered_row_count +-------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +-------------------- + 3958 +(1 row) + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; + a +--- + Å +(1 row) + diff --git a/output/am_copyto.source b/output/am_copyto.source new file mode 100644 index 000000000..a8d841f18 --- /dev/null +++ b/output/am_copyto.source @@ -0,0 +1,23 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- load table data from file +COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; +a 01-10-1990 2090 97.1 XA {a} +b 11-01-1990 2203 98.1 XA {a,b} +c 11-01-1988 2907 99.4 XB {w,y} +d 05-05-1985 2314 98.3 XB {} +e 05-05-1995 2236 98.2 XC {a} +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/output/am_create.source b/output/am_create.source new file mode 100644 index 000000000..961c0494d --- /dev/null +++ b/output/am_create.source @@ -0,0 +1,44 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR +ERROR: invalid option "bad_option_name" +HINT: Valid options in this context are: compression, stripe_row_count, block_row_count +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR +ERROR: invalid stripe row count +HINT: Stripe row count must be an integer between 1000 and 10000000 +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR +ERROR: invalid block row count +HINT: Block row count must be an integer between 1000 and 100000 +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR +ERROR: invalid compression type +HINT: Valid options are: none, pglz +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; + count +------- + 0 +(1 row) + diff --git a/output/am_data_types.source b/output/am_data_types.source new file mode 100644 index 000000000..23fdcfa29 --- /dev/null +++ b/output/am_data_types.source @@ -0,0 +1,78 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; +COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; +SELECT * FROM test_array_types; + int_array | bigint_array | text_array +--------------------------+--------------------------------------------+------------ + {1,2,3} | {1,2,3} | {a,b,c} + {} | {} | {} + {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} +(3 rows) + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; +COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; +SELECT * FROM test_datetime_types; + timestamp | timestamp_with_timezone | date | time | interval +---------------------+-------------------------+------------+----------+----------- + 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours + 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 +(2 rows) + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; +COPY test_enum_and_composite_types FROM + '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; +SELECT * FROM test_enum_and_composite_types; + enum | composite +------+----------- + a | (2,b) + b | (3,c) +(2 rows) + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; +COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; +SELECT * FROM test_range_types; + int4range | int8range | numrange | tsrange +-----------+-----------+----------+----------------------------------------------- + [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") + empty | [1,) | (,) | empty +(2 rows) + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; +SELECT * FROM test_other_types; + bool | bytea | money | inet | bitstring | uuid | json +------+------------+-------+-------------+-----------+--------------------------------------+------------------ + f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} + t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] +(2 rows) + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; +COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; +SELECT * FROM test_null_values; + a | b | c +---+--------+----- + | {NULL} | (,) + | | +(2 rows) + diff --git a/output/am_load.source b/output/am_load.source new file mode 100644 index 000000000..c76f203eb --- /dev/null +++ b/output/am_load.source @@ -0,0 +1,39 @@ +-- +-- Test loading data into cstore_fdw tables. +-- +-- COPY with incorrect delimiter +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR +ERROR: missing data for column "birthdate" +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR +ERROR: program "invalid_program" failed +DETAIL: command not found +-- COPY into uncompressed table from file +COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +-- COPY into compressed table +COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' + WITH CSV; +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +COPY famous_constants (name, value) FROM STDIN WITH CSV; +SELECT * FROM famous_constants ORDER BY id, name; + id | name | value +----+----------------+----------- + 1 | pi | 3.141 + 2 | e | 2.718 + 3 | gamma | 0.577 + 4 | bohr radius | 5.291e-11 + | avagadro | 6.022e+23 + | electron mass | 9.109e-31 + | proton mass | 1.672e-27 + | speed of light | 2.997e+08 +(8 rows) + +DROP FOREIGN TABLE famous_constants; diff --git a/sql/am_alter.sql b/sql/am_alter.sql new file mode 100644 index 000000000..5ba3beb34 --- /dev/null +++ b/sql/am_alter.sql @@ -0,0 +1,85 @@ +-- +-- Testing ALTER TABLE on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; + +WITH sample_data AS (VALUES + (1, 2, 3), + (4, 5, 6), + (7, 8, 9) +) +INSERT INTO test_alter_table SELECT * FROM sample_data; + +-- drop a column +ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; + +-- test analyze +ANALYZE test_alter_table; + +-- verify select queries run as expected +SELECT * FROM test_alter_table; +SELECT a FROM test_alter_table; +SELECT b FROM test_alter_table; + +-- verify insert runs as expected +INSERT INTO test_alter_table (SELECT 3, 5, 8); +INSERT INTO test_alter_table (SELECT 5, 8); + + +-- add a column with no defaults +ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +SELECT * FROM test_alter_table; +INSERT INTO test_alter_table (SELECT 3, 5, 8); +SELECT * FROM test_alter_table; + + +-- add a fixed-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); +SELECT * from test_alter_table; + + +-- add a variable-length column with default value +ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +SELECT * from test_alter_table; +INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); +SELECT * from test_alter_table; + + +-- drop couple of columns +ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; +ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ANALYZE test_alter_table; +SELECT * from test_alter_table; +SELECT count(*) from test_alter_table; +SELECT count(t.*) from test_alter_table t; + + +-- unsupported default values +ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +SELECT * FROM test_alter_table; +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ANALYZE test_alter_table; +SELECT * FROM test_alter_table; + +-- unsupported type change +ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; +ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; + +-- this is valid type change +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; + +-- this is not valid +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; + +-- text / varchar conversion is valid both ways +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; + +DROP FOREIGN TABLE test_alter_table; diff --git a/sql/am_analyze.sql b/sql/am_analyze.sql new file mode 100644 index 000000000..4476454a6 --- /dev/null +++ b/sql/am_analyze.sql @@ -0,0 +1,11 @@ +-- +-- Test the ANALYZE command for cstore_fdw tables. +-- + +-- ANALYZE uncompressed table +ANALYZE contestant; +SELECT count(*) FROM pg_stats WHERE tablename='contestant'; + +-- ANALYZE compressed table +ANALYZE contestant_compressed; +SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql new file mode 100644 index 000000000..bb90c72ca --- /dev/null +++ b/sql/am_block_filtering.sql @@ -0,0 +1,69 @@ +-- +-- Test block filtering in cstore_fdw using min/max values in stripe skip lists. +-- + + +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If blocks get filtered by cstore_fdw, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; + + +-- Create and load data +CREATE FOREIGN TABLE test_block_filtering (a int) + SERVER cstore_server + OPTIONS(block_row_count '1000', stripe_row_count '2000'); + +COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; + + +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); + + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); + + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); +SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); + + +-- Verify that we are fine with collations which use a different alphabet order +CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") + SERVER cstore_server; +COPY collation_block_filtering_test FROM STDIN; +A +Å +B +\. + +SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/sql/am_copyto.sql b/sql/am_copyto.sql new file mode 100644 index 000000000..4e9e839b7 --- /dev/null +++ b/sql/am_copyto.sql @@ -0,0 +1,17 @@ +-- +-- Test copying data from cstore_fdw tables. +-- +CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + +-- load table data from file +COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; + +-- export using COPY table TO ... +COPY test_contestant TO STDOUT; + +-- export using COPY (SELECT * FROM table) TO ... +COPY (select * from test_contestant) TO STDOUT; + +DROP FOREIGN TABLE test_contestant CASCADE; diff --git a/sql/am_create.sql b/sql/am_create.sql new file mode 100644 index 000000000..ba52137c1 --- /dev/null +++ b/sql/am_create.sql @@ -0,0 +1,43 @@ +-- +-- Test the CREATE statements related to cstore_fdw. +-- + + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; + + +-- Validator tests +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server + OPTIONS(bad_option_name '1'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () + SERVER cstore_server + OPTIONS(stripe_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () + SERVER cstore_server + OPTIONS(block_row_count '0'); -- ERROR + +CREATE FOREIGN TABLE test_validator_invalid_compression_type () + SERVER cstore_server + OPTIONS(compression 'invalid_compression'); -- ERROR + +-- Create uncompressed table +CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server; + + +-- Create compressed table with automatically determined file path +CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, + percentile FLOAT, country CHAR(3), achievements TEXT[]) + SERVER cstore_server + OPTIONS(compression 'pglz'); + +-- Test that querying an empty table works +ANALYZE contestant; +SELECT count(*) FROM contestant; diff --git a/sql/am_data_types.sql b/sql/am_data_types.sql new file mode 100644 index 000000000..092538a57 --- /dev/null +++ b/sql/am_data_types.sql @@ -0,0 +1,68 @@ +-- +-- Test loading and reading different data types to/from cstore_fdw foreign tables. +-- + + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; +SET timezone to 'GMT'; +SET intervalstyle TO 'POSTGRES_VERBOSE'; + + +-- Test array types +CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) SERVER cstore_server; + +COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; + +SELECT * FROM test_array_types; + + +-- Test date/time types +CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, + timestamp_with_timezone timestamp with time zone, date date, time time, + interval interval) SERVER cstore_server; + +COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; + +SELECT * FROM test_datetime_types; + + +-- Test enum and composite types +CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); +CREATE TYPE composite_type AS (a int, b text); + +CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) SERVER cstore_server; + +COPY test_enum_and_composite_types FROM + '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; + +SELECT * FROM test_enum_and_composite_types; + + +-- Test range types +CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) SERVER cstore_server; + +COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; + +SELECT * FROM test_range_types; + + +-- Test other types +CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; + +COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; + +SELECT * FROM test_other_types; + + +-- Test null values +CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) + SERVER cstore_server; + +COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; + +SELECT * FROM test_null_values; diff --git a/sql/am_drop.sql b/sql/am_drop.sql new file mode 100644 index 000000000..c64b5c99b --- /dev/null +++ b/sql/am_drop.sql @@ -0,0 +1,48 @@ +-- +-- Tests the different DROP commands for cstore_fdw tables. +-- +-- DROP FOREIGN TABL +-- DROP SCHEMA +-- DROP EXTENSION +-- DROP DATABASE +-- + +-- Note that travis does not create +-- cstore_fdw extension in default database (postgres). This has caused +-- different behavior between travis tests and local tests. Thus +-- 'postgres' directory is excluded from comparison to have the same result. + +-- store postgres database oid +SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset + +-- DROP cstore_fdw tables +DROP FOREIGN TABLE contestant; +DROP FOREIGN TABLE contestant_compressed; + +-- Create a cstore_fdw table under a schema and drop it. +CREATE SCHEMA test_schema; +CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +DROP SCHEMA test_schema CASCADE; + +SELECT current_database() datname \gset + +CREATE DATABASE db_to_drop; +\c db_to_drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; + +DROP EXTENSION cstore_fdw CASCADE; + +-- test database drop +CREATE EXTENSION cstore_fdw; +CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset + +CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; + +\c :datname + +DROP DATABASE db_to_drop; diff --git a/sql/am_functions.sql b/sql/am_functions.sql new file mode 100644 index 000000000..ed7e260b3 --- /dev/null +++ b/sql/am_functions.sql @@ -0,0 +1,20 @@ +-- +-- Test utility functions for cstore_fdw tables. +-- + +CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; +CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE non_cstore_table (a int); + +COPY table_with_data FROM STDIN; +1 +2 +3 +\. + +SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); +SELECT cstore_table_size('non_cstore_table'); + +DROP FOREIGN TABLE empty_table; +DROP FOREIGN TABLE table_with_data; +DROP TABLE non_cstore_table; diff --git a/sql/am_insert.sql b/sql/am_insert.sql new file mode 100644 index 000000000..7a6b075ce --- /dev/null +++ b/sql/am_insert.sql @@ -0,0 +1,56 @@ +-- +-- Testing insert on cstore_fdw tables. +-- + +CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; + +-- test single row inserts fail +select count(*) from test_insert_command; +insert into test_insert_command values(1); +select count(*) from test_insert_command; + +insert into test_insert_command default values; +select count(*) from test_insert_command; + +-- test inserting from another table succeed +CREATE TABLE test_insert_command_data (a int); + +select count(*) from test_insert_command_data; +insert into test_insert_command_data values(1); +select count(*) from test_insert_command_data; + +insert into test_insert_command select * from test_insert_command_data; +select count(*) from test_insert_command; + +drop table test_insert_command_data; +drop foreign table test_insert_command; + +-- test long attribute value insertion +-- create sufficiently long text so that data is stored in toast +CREATE TABLE test_long_text AS +SELECT a as int_val, string_agg(random()::text, '') as text_val +FROM generate_series(1, 10) a, generate_series(1, 1000) b +GROUP BY a ORDER BY a; + +-- store hash values of text for later comparison +CREATE TABLE test_long_text_hash AS +SELECT int_val, md5(text_val) AS hash +FROM test_long_text; + +CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) +SERVER cstore_server; + +-- store long text in cstore table +INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; + +-- drop source table to remove original text from toast +DROP TABLE test_long_text; + +-- check if text data is still available in cstore table +-- by comparing previously stored hash. +SELECT a.int_val +FROM test_long_text_hash a, test_cstore_long_text c +WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); + +DROP TABLE test_long_text_hash; +DROP FOREIGN TABLE test_cstore_long_text; diff --git a/sql/am_load.sql b/sql/am_load.sql new file mode 100644 index 000000000..7f9238b57 --- /dev/null +++ b/sql/am_load.sql @@ -0,0 +1,44 @@ +-- +-- Test loading data into cstore_fdw tables. +-- + +-- COPY with incorrect delimiter +COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' + WITH DELIMITER '|'; -- ERROR + +-- COPY with invalid program +COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR + +-- COPY into uncompressed table from file +COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; + +-- COPY into compressed table +COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; + +-- COPY into uncompressed table from program +COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' + WITH CSV; + +-- Test column list +CREATE FOREIGN TABLE famous_constants (id int, name text, value real) + SERVER cstore_server; +COPY famous_constants (value, name, id) FROM STDIN WITH CSV; +3.141,pi,1 +2.718,e,2 +0.577,gamma,3 +5.291e-11,bohr radius,4 +\. + +COPY famous_constants (name, value) FROM STDIN WITH CSV; +avagadro,6.022e23 +electron mass,9.109e-31 +proton mass,1.672e-27 +speed of light,2.997e8 +\. + +SELECT * FROM famous_constants ORDER BY id, name; + +DROP FOREIGN TABLE famous_constants; diff --git a/sql/am_query.sql b/sql/am_query.sql new file mode 100644 index 000000000..87743e7bd --- /dev/null +++ b/sql/am_query.sql @@ -0,0 +1,34 @@ +-- +-- Test querying cstore_fdw tables. +-- + +-- Settings to make the result deterministic +SET datestyle = "ISO, YMD"; + +-- Query uncompressed data +SELECT count(*) FROM contestant; +SELECT avg(rating), stddev_samp(rating) FROM contestant; +SELECT country, avg(rating) FROM contestant WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant ORDER BY handle; + +-- Query compressed data +SELECT count(*) FROM contestant_compressed; +SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; +SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 + GROUP BY country ORDER BY country; +SELECT * FROM contestant_compressed ORDER BY handle; + +-- Verify that we handle whole-row references correctly +SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; + +-- Test variables used in expressions +CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; + +INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; +INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; + +(SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); + +DROP FOREIGN TABLE union_first, union_second; diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql new file mode 100644 index 000000000..a1849045e --- /dev/null +++ b/sql/am_truncate.sql @@ -0,0 +1,116 @@ +-- +-- Test the TRUNCATE TABLE command for cstore_fdw tables. +-- + +-- print whether we're using version > 10 to make version-specific tests clear +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; + +-- CREATE a cstore_fdw table, fill with some data -- +CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; +CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_regular (a int, b int); + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; + +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; + +-- query rows +SELECT * FROM cstore_truncate_test; + +TRUNCATE TABLE cstore_truncate_test; + +SELECT * FROM cstore_truncate_test; + +SELECT COUNT(*) from cstore_truncate_test; + +SELECT count(*) FROM cstore_truncate_test_compressed; +TRUNCATE TABLE cstore_truncate_test_compressed; +SELECT count(*) FROM cstore_truncate_test_compressed; + +SELECT cstore_table_size('cstore_truncate_test_compressed'); + +INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; +INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; + +SELECT * from cstore_truncate_test; + +SELECT * from cstore_truncate_test_second; + +SELECT * from cstore_truncate_test_regular; + +-- make sure multi truncate works +-- notice that the same table might be repeated +TRUNCATE TABLE cstore_truncate_test, + cstore_truncate_test_regular, + cstore_truncate_test_second, + cstore_truncate_test; + +SELECT * from cstore_truncate_test; +SELECT * from cstore_truncate_test_second; +SELECT * from cstore_truncate_test_regular; + +-- test if truncate on empty table works +TRUNCATE TABLE cstore_truncate_test; +SELECT * from cstore_truncate_test; + +-- test if a cached truncate from a pl/pgsql function works +CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ +BEGIN + INSERT INTO cstore_truncate_test_regular select a, a from generate_series(1, 10) a; + TRUNCATE TABLE cstore_truncate_test_regular; +END;$$ +LANGUAGE plpgsql; + +SELECT cstore_truncate_test_regular_func(); +-- the cached plans are used stating from the second call +SELECT cstore_truncate_test_regular_func(); +DROP FUNCTION cstore_truncate_test_regular_func(); + +DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test_regular; +DROP FOREIGN TABLE cstore_truncate_test_compressed; + +-- test truncate with schema +CREATE SCHEMA truncate_schema; +CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT COUNT(*) FROM truncate_schema.truncate_tbl; + +INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); + +-- create a user that can not truncate +CREATE USER truncate_user; +GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; +GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; +REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; + +SELECT current_user \gset + +\c - truncate_user +-- verify truncate command fails and check number of rows +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +-- switch to super user, grant truncate to truncate_user +\c - :current_user +GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; + +-- verify truncate_user can truncate now +\c - truncate_user +SELECT count(*) FROM truncate_schema.truncate_tbl; +TRUNCATE TABLE truncate_schema.truncate_tbl; +SELECT count(*) FROM truncate_schema.truncate_tbl; + +\c - :current_user + +-- cleanup +DROP SCHEMA truncate_schema CASCADE; +DROP USER truncate_user; From a57b9004a4e02b3c3dcdc7b973cb41822bf815f9 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 16:41:28 -0700 Subject: [PATCH 032/124] tests WIP --- Makefile | 4 ++-- cstore_tableam.c | 28 ++++++++++++++++++++++-- cstore_tableam.h | 2 ++ expected/clean.out | 10 +++++++++ expected/extension_create.out | 2 ++ input/am_block_filtering.source | 8 +++---- input/am_copyto.source | 6 +++--- input/am_create.source | 34 +++++------------------------ input/am_data_types.source | 24 ++++++++++----------- input/am_load.source | 6 +++--- input/fdw_create.source | 4 ---- mod.c | 3 +++ output/fdw_create.source | 2 -- sql/am_alter.sql | 38 ++++++++++++++++----------------- sql/am_block_filtering.sql | 8 +++---- sql/am_copyto.sql | 6 +++--- sql/am_create.sql | 34 +++++------------------------ sql/am_data_types.sql | 24 ++++++++++----------- sql/am_drop.sql | 16 +++++++------- sql/am_functions.sql | 8 +++---- sql/am_insert.sql | 8 +++---- sql/am_load.sql | 6 +++--- sql/am_query.sql | 6 +++--- sql/am_truncate.sql | 12 +++++------ sql/clean.sql | 11 ++++++++++ sql/extension_create.sql | 4 ++++ 26 files changed, 158 insertions(+), 156 deletions(-) create mode 100644 expected/clean.out create mode 100644 expected/extension_create.out create mode 100644 sql/clean.sql create mode 100644 sql/extension_create.sql diff --git a/Makefile b/Makefile index 2fc550ca9..ad85b294a 100644 --- a/Makefile +++ b/Makefile @@ -14,8 +14,8 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -REGRESS = am_create am_load am_query am_analyze am_data_types am_functions \ - am_block_filtering am_drop am_insert am_copyto am_alter am_truncate \ +REGRESS = extension_create am_create am_load am_query am_analyze am_data_types am_functions \ + am_block_filtering am_drop am_insert am_copyto am_alter am_truncate clean \ fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ diff --git a/cstore_tableam.c b/cstore_tableam.c index f93971c59..21c1aab1f 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -43,6 +43,7 @@ typedef struct CStoreScanDescData typedef struct CStoreScanDescData *CStoreScanDesc; static TableWriteState *CStoreWriteState = NULL; +static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; static CStoreOptions * CStoreGetDefaultOptions(void) @@ -71,7 +72,7 @@ cstore_init_write_state(Relation relation) CStoreOptions *cstoreOptions = CStoreGetDefaultOptions(); TupleDesc tupdesc = RelationGetDescr(relation); - elog(NOTICE, "initializing write state for relation %d", relation->rd_id); + elog(LOG, "initializing write state for relation %d", relation->rd_id); CStoreWriteState = CStoreBeginWrite(relation->rd_id, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, @@ -87,7 +88,7 @@ cstore_free_write_state() { if (CStoreWriteState != NULL) { - elog(NOTICE, "flushing write state for relation %d", CStoreWriteState->relation->rd_id); + elog(LOG, "flushing write state for relation %d", CStoreWriteState->relation->rd_id); CStoreEndWrite(CStoreWriteState); CStoreWriteState = NULL; } @@ -495,6 +496,29 @@ cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, elog(ERROR, "cstore_scan_sample_next_tuple not implemented"); } +static void +CStoreExecutorEnd(QueryDesc *queryDesc) +{ + cstore_free_write_state(); + if (PreviousExecutorEndHook) + PreviousExecutorEndHook(queryDesc); + else + standard_ExecutorEnd(queryDesc); +} + +void +cstore_tableam_init() +{ + PreviousExecutorEndHook = ExecutorEnd_hook; + ExecutorEnd_hook = CStoreExecutorEnd; +} + +void +cstore_tableam_finish() +{ + ExecutorEnd_hook = PreviousExecutorEndHook; +} + static const TableAmRoutine cstore_am_methods = { .type = T_TableAmRoutine, diff --git a/cstore_tableam.h b/cstore_tableam.h index f81c13155..bd1f3805e 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -5,3 +5,5 @@ const TableAmRoutine *GetCstoreTableAmRoutine(void); Datum cstore_tableam_handler(PG_FUNCTION_ARGS); extern void cstore_free_write_state(void); +extern void cstore_tableam_init(void); +extern void cstore_tableam_finish(void); diff --git a/expected/clean.out b/expected/clean.out new file mode 100644 index 000000000..85b25987b --- /dev/null +++ b/expected/clean.out @@ -0,0 +1,10 @@ +DROP TABLE test_insert_command; +DROP TABLE collation_block_filtering_test; +DROP TABLE test_null_values; +DROP TABLE test_other_types; +DROP TABLE test_range_types; +DROP TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP TABLE test_datetime_types; +DROP TABLE test_array_types; diff --git a/expected/extension_create.out b/expected/extension_create.out new file mode 100644 index 000000000..c4d94e1e5 --- /dev/null +++ b/expected/extension_create.out @@ -0,0 +1,2 @@ +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; diff --git a/input/am_block_filtering.source b/input/am_block_filtering.source index dc3170f0d..0225bde16 100644 --- a/input/am_block_filtering.source +++ b/input/am_block_filtering.source @@ -28,8 +28,8 @@ $$ LANGUAGE PLPGSQL; -- Create and load data -CREATE FOREIGN TABLE test_block_filtering (a int) - SERVER cstore_server +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam OPTIONS(block_row_count '1000', stripe_row_count '2000'); COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; @@ -58,8 +58,8 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET -- Verify that we are fine with collations which use a different alphabet order -CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server; +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; COPY collation_block_filtering_test FROM STDIN; A Å diff --git a/input/am_copyto.source b/input/am_copyto.source index a4b753a8d..bb333bacf 100644 --- a/input/am_copyto.source +++ b/input/am_copyto.source @@ -1,9 +1,9 @@ -- -- Test copying data from cstore_fdw tables. -- -CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- load table data from file COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; @@ -14,4 +14,4 @@ COPY test_contestant TO STDOUT; -- export using COPY (SELECT * FROM table) TO ... COPY (select * from test_contestant) TO STDOUT; -DROP FOREIGN TABLE test_contestant CASCADE; +DROP TABLE test_contestant CASCADE; diff --git a/input/am_create.source b/input/am_create.source index ba52137c1..8a1612f7a 100644 --- a/input/am_create.source +++ b/input/am_create.source @@ -1,42 +1,18 @@ -- --- Test the CREATE statements related to cstore_fdw. +-- Test the CREATE statements related to cstore. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; - -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; - - --- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server - OPTIONS(bad_option_name '1'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () - SERVER cstore_server - OPTIONS(stripe_row_count '0'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () - SERVER cstore_server - OPTIONS(block_row_count '0'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_compression_type () - SERVER cstore_server - OPTIONS(compression 'invalid_compression'); -- ERROR - -- Create uncompressed table -CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- Create compressed table with automatically determined file path -CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(compression 'pglz'); + USING cstore_tableam; -- Test that querying an empty table works ANALYZE contestant; diff --git a/input/am_data_types.source b/input/am_data_types.source index ec83c4d8c..24c661090 100644 --- a/input/am_data_types.source +++ b/input/am_data_types.source @@ -10,8 +10,8 @@ SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types -CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server; +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; @@ -19,9 +19,9 @@ SELECT * FROM test_array_types; -- Test date/time types -CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, +CREATE TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server; + interval interval) USING cstore_tableam; COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; @@ -32,8 +32,8 @@ SELECT * FROM test_datetime_types; CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); -CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server; +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; COPY test_enum_and_composite_types FROM '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; @@ -42,8 +42,8 @@ SELECT * FROM test_enum_and_composite_types; -- Test range types -CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server; +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; @@ -51,8 +51,8 @@ SELECT * FROM test_range_types; -- Test other types -CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; @@ -60,8 +60,8 @@ SELECT * FROM test_other_types; -- Test null values -CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server; +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; diff --git a/input/am_load.source b/input/am_load.source index 0913acde7..c2ad581e8 100644 --- a/input/am_load.source +++ b/input/am_load.source @@ -23,8 +23,8 @@ COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv WITH CSV; -- Test column list -CREATE FOREIGN TABLE famous_constants (id int, name text, value real) - SERVER cstore_server; +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; COPY famous_constants (value, name, id) FROM STDIN WITH CSV; 3.141,pi,1 2.718,e,2 @@ -41,4 +41,4 @@ speed of light,2.997e8 SELECT * FROM famous_constants ORDER BY id, name; -DROP FOREIGN TABLE famous_constants; +DROP TABLE famous_constants; diff --git a/input/fdw_create.source b/input/fdw_create.source index ba52137c1..bb3a38e28 100644 --- a/input/fdw_create.source +++ b/input/fdw_create.source @@ -2,10 +2,6 @@ -- Test the CREATE statements related to cstore_fdw. -- - --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; - CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; diff --git a/mod.c b/mod.c index d962e9820..4268126e3 100644 --- a/mod.c +++ b/mod.c @@ -16,6 +16,7 @@ #include "fmgr.h" #include "mod.h" +#include "cstore_tableam.h" #include "cstore_fdw.h" PG_MODULE_MAGIC; @@ -23,6 +24,7 @@ PG_MODULE_MAGIC; void _PG_init(void) { + cstore_tableam_init(); cstore_fdw_init(); } @@ -30,5 +32,6 @@ _PG_init(void) void _PG_fini(void) { + cstore_tableam_finish(); cstore_fdw_finish(); } diff --git a/output/fdw_create.source b/output/fdw_create.source index 961c0494d..41f17fdd8 100644 --- a/output/fdw_create.source +++ b/output/fdw_create.source @@ -1,8 +1,6 @@ -- -- Test the CREATE statements related to cstore_fdw. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests CREATE FOREIGN TABLE test_validator_invalid_option () diff --git a/sql/am_alter.sql b/sql/am_alter.sql index 5ba3beb34..3b608f9cd 100644 --- a/sql/am_alter.sql +++ b/sql/am_alter.sql @@ -2,7 +2,7 @@ -- Testing ALTER TABLE on cstore_fdw tables. -- -CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +CREATE TABLE test_alter_table (a int, b int, c int) USING cstore_tableam; WITH sample_data AS (VALUES (1, 2, 3), @@ -12,7 +12,7 @@ WITH sample_data AS (VALUES INSERT INTO test_alter_table SELECT * FROM sample_data; -- drop a column -ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +ALTER TABLE test_alter_table DROP COLUMN a; -- test analyze ANALYZE test_alter_table; @@ -28,29 +28,29 @@ INSERT INTO test_alter_table (SELECT 5, 8); -- add a column with no defaults -ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +ALTER TABLE test_alter_table ADD COLUMN d int; SELECT * FROM test_alter_table; INSERT INTO test_alter_table (SELECT 3, 5, 8); SELECT * FROM test_alter_table; -- add a fixed-length column with default value -ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +ALTER TABLE test_alter_table ADD COLUMN e int default 3; SELECT * from test_alter_table; INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); SELECT * from test_alter_table; -- add a variable-length column with default value -ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +ALTER TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; SELECT * from test_alter_table; INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); SELECT * from test_alter_table; -- drop couple of columns -ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; -ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ALTER TABLE test_alter_table DROP COLUMN c; +ALTER TABLE test_alter_table DROP COLUMN e; ANALYZE test_alter_table; SELECT * from test_alter_table; SELECT count(*) from test_alter_table; @@ -58,28 +58,28 @@ SELECT count(t.*) from test_alter_table t; -- unsupported default values -ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); -ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +ALTER TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; SELECT * FROM test_alter_table; -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +ALTER TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; SELECT * FROM test_alter_table; -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ALTER TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; ANALYZE test_alter_table; SELECT * FROM test_alter_table; -- unsupported type change -ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; -ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; -ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +ALTER TABLE test_alter_table ADD COLUMN i int; +ALTER TABLE test_alter_table ADD COLUMN j float; +ALTER TABLE test_alter_table ADD COLUMN k text; -- this is valid type change -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +ALTER TABLE test_alter_table ALTER COLUMN i TYPE float; -- this is not valid -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; +ALTER TABLE test_alter_table ALTER COLUMN j TYPE int; -- text / varchar conversion is valid both ways -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; +ALTER TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER TABLE test_alter_table ALTER COLUMN k TYPE text; -DROP FOREIGN TABLE test_alter_table; +DROP TABLE test_alter_table; diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql index bb90c72ca..c7d0e997c 100644 --- a/sql/am_block_filtering.sql +++ b/sql/am_block_filtering.sql @@ -28,8 +28,8 @@ $$ LANGUAGE PLPGSQL; -- Create and load data -CREATE FOREIGN TABLE test_block_filtering (a int) - SERVER cstore_server +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam OPTIONS(block_row_count '1000', stripe_row_count '2000'); COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; @@ -58,8 +58,8 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET -- Verify that we are fine with collations which use a different alphabet order -CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server; +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; COPY collation_block_filtering_test FROM STDIN; A Å diff --git a/sql/am_copyto.sql b/sql/am_copyto.sql index 4e9e839b7..7288ff66f 100644 --- a/sql/am_copyto.sql +++ b/sql/am_copyto.sql @@ -1,9 +1,9 @@ -- -- Test copying data from cstore_fdw tables. -- -CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- load table data from file COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; @@ -14,4 +14,4 @@ COPY test_contestant TO STDOUT; -- export using COPY (SELECT * FROM table) TO ... COPY (select * from test_contestant) TO STDOUT; -DROP FOREIGN TABLE test_contestant CASCADE; +DROP TABLE test_contestant CASCADE; diff --git a/sql/am_create.sql b/sql/am_create.sql index ba52137c1..8a1612f7a 100644 --- a/sql/am_create.sql +++ b/sql/am_create.sql @@ -1,42 +1,18 @@ -- --- Test the CREATE statements related to cstore_fdw. +-- Test the CREATE statements related to cstore. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; - -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; - - --- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server - OPTIONS(bad_option_name '1'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () - SERVER cstore_server - OPTIONS(stripe_row_count '0'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () - SERVER cstore_server - OPTIONS(block_row_count '0'); -- ERROR - -CREATE FOREIGN TABLE test_validator_invalid_compression_type () - SERVER cstore_server - OPTIONS(compression 'invalid_compression'); -- ERROR - -- Create uncompressed table -CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- Create compressed table with automatically determined file path -CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(compression 'pglz'); + USING cstore_tableam; -- Test that querying an empty table works ANALYZE contestant; diff --git a/sql/am_data_types.sql b/sql/am_data_types.sql index 092538a57..b2668e71f 100644 --- a/sql/am_data_types.sql +++ b/sql/am_data_types.sql @@ -10,8 +10,8 @@ SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types -CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server; +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; @@ -19,9 +19,9 @@ SELECT * FROM test_array_types; -- Test date/time types -CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, +CREATE TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server; + interval interval) USING cstore_tableam; COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; @@ -32,8 +32,8 @@ SELECT * FROM test_datetime_types; CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); -CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server; +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; COPY test_enum_and_composite_types FROM '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; @@ -42,8 +42,8 @@ SELECT * FROM test_enum_and_composite_types; -- Test range types -CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server; +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; @@ -51,8 +51,8 @@ SELECT * FROM test_range_types; -- Test other types -CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; @@ -60,8 +60,8 @@ SELECT * FROM test_other_types; -- Test null values -CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server; +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; diff --git a/sql/am_drop.sql b/sql/am_drop.sql index c64b5c99b..5945a9f2c 100644 --- a/sql/am_drop.sql +++ b/sql/am_drop.sql @@ -1,7 +1,7 @@ -- -- Tests the different DROP commands for cstore_fdw tables. -- --- DROP FOREIGN TABL +-- DROP TABL -- DROP SCHEMA -- DROP EXTENSION -- DROP DATABASE @@ -16,12 +16,12 @@ SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -- DROP cstore_fdw tables -DROP FOREIGN TABLE contestant; -DROP FOREIGN TABLE contestant_compressed; +DROP TABLE contestant; +DROP TABLE contestant_compressed; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; -CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; DROP SCHEMA test_schema CASCADE; SELECT current_database() datname \gset @@ -29,19 +29,19 @@ SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +CREATE USING cstore_tableam DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset -CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +CREATE TABLE test_table(data int) USING cstore_tableam; DROP EXTENSION cstore_fdw CASCADE; -- test database drop CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; +CREATE USING cstore_tableam DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset -CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +CREATE TABLE test_table(data int) USING cstore_tableam; \c :datname diff --git a/sql/am_functions.sql b/sql/am_functions.sql index ed7e260b3..70624e6d4 100644 --- a/sql/am_functions.sql +++ b/sql/am_functions.sql @@ -2,8 +2,8 @@ -- Test utility functions for cstore_fdw tables. -- -CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; -CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE empty_table (a int) USING cstore_tableam; +CREATE TABLE table_with_data (a int) USING cstore_tableam; CREATE TABLE non_cstore_table (a int); COPY table_with_data FROM STDIN; @@ -15,6 +15,6 @@ COPY table_with_data FROM STDIN; SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); SELECT cstore_table_size('non_cstore_table'); -DROP FOREIGN TABLE empty_table; -DROP FOREIGN TABLE table_with_data; +DROP TABLE empty_table; +DROP TABLE table_with_data; DROP TABLE non_cstore_table; diff --git a/sql/am_insert.sql b/sql/am_insert.sql index 7a6b075ce..b249828e7 100644 --- a/sql/am_insert.sql +++ b/sql/am_insert.sql @@ -2,7 +2,7 @@ -- Testing insert on cstore_fdw tables. -- -CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +CREATE TABLE test_insert_command (a int) USING cstore_tableam; -- test single row inserts fail select count(*) from test_insert_command; @@ -37,8 +37,8 @@ CREATE TABLE test_long_text_hash AS SELECT int_val, md5(text_val) AS hash FROM test_long_text; -CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) -SERVER cstore_server; +CREATE TABLE test_cstore_long_text(int_val int, text_val text) +USING cstore_tableam; -- store long text in cstore table INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; @@ -53,4 +53,4 @@ FROM test_long_text_hash a, test_cstore_long_text c WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); DROP TABLE test_long_text_hash; -DROP FOREIGN TABLE test_cstore_long_text; +DROP TABLE test_cstore_long_text; diff --git a/sql/am_load.sql b/sql/am_load.sql index 7f9238b57..c7e9e5287 100644 --- a/sql/am_load.sql +++ b/sql/am_load.sql @@ -23,8 +23,8 @@ COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/cont WITH CSV; -- Test column list -CREATE FOREIGN TABLE famous_constants (id int, name text, value real) - SERVER cstore_server; +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; COPY famous_constants (value, name, id) FROM STDIN WITH CSV; 3.141,pi,1 2.718,e,2 @@ -41,4 +41,4 @@ speed of light,2.997e8 SELECT * FROM famous_constants ORDER BY id, name; -DROP FOREIGN TABLE famous_constants; +DROP TABLE famous_constants; diff --git a/sql/am_query.sql b/sql/am_query.sql index 87743e7bd..7ac8c2ea4 100644 --- a/sql/am_query.sql +++ b/sql/am_query.sql @@ -23,12 +23,12 @@ SELECT * FROM contestant_compressed ORDER BY handle; SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; -- Test variables used in expressions -CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +CREATE TABLE union_first (a int, b int) USING cstore_tableam; +CREATE TABLE union_second (a int, b int) USING cstore_tableam; INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; (SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); -DROP FOREIGN TABLE union_first, union_second; +DROP TABLE union_first, union_second; diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index a1849045e..cc02c1805 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -7,9 +7,9 @@ SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; -- CREATE a cstore_fdw table, fill with some data -- -CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam OPTIONS (compression 'pglz'); CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; @@ -70,13 +70,13 @@ SELECT cstore_truncate_test_regular_func(); SELECT cstore_truncate_test_regular_func(); DROP FUNCTION cstore_truncate_test_regular_func(); -DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test, cstore_truncate_test_second; DROP TABLE cstore_truncate_test_regular; -DROP FOREIGN TABLE cstore_truncate_test_compressed; +DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; -CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam OPTIONS(compression 'pglz'); INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; diff --git a/sql/clean.sql b/sql/clean.sql new file mode 100644 index 000000000..2e038d321 --- /dev/null +++ b/sql/clean.sql @@ -0,0 +1,11 @@ + +DROP TABLE test_insert_command; +DROP TABLE collation_block_filtering_test; +DROP TABLE test_null_values; +DROP TABLE test_other_types; +DROP TABLE test_range_types; +DROP TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP TABLE test_datetime_types; +DROP TABLE test_array_types; diff --git a/sql/extension_create.sql b/sql/extension_create.sql new file mode 100644 index 000000000..2e73f5be7 --- /dev/null +++ b/sql/extension_create.sql @@ -0,0 +1,4 @@ + +-- Install cstore_fdw +CREATE EXTENSION cstore_fdw; + From 18f6829621463184a250ad579c4a3dfbf2312b0c Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 17:24:00 -0700 Subject: [PATCH 033/124] more fixes --- cstore_tableam.c | 10 ++++++++++ expected/am_create.out | 28 ++-------------------------- input/am_block_filtering.source | 3 +-- output/am_create.source | 28 ++-------------------------- sql/am_block_filtering.sql | 3 +-- sql/am_functions.sql | 4 ++-- sql/am_truncate.sql | 6 +++--- sql/clean.sql | 1 + 8 files changed, 22 insertions(+), 61 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 21c1aab1f..09bc8e5e4 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -44,6 +44,7 @@ typedef struct CStoreScanDescData *CStoreScanDesc; static TableWriteState *CStoreWriteState = NULL; static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; +static MemoryContext CStoreContext = NULL; static CStoreOptions * CStoreGetDefaultOptions(void) @@ -71,13 +72,22 @@ cstore_init_write_state(Relation relation) { CStoreOptions *cstoreOptions = CStoreGetDefaultOptions(); TupleDesc tupdesc = RelationGetDescr(relation); + MemoryContext oldContext; + + if (CStoreContext == NULL) + { + CStoreContext = AllocSetContextCreate(TopMemoryContext, "cstore context", + ALLOCSET_DEFAULT_SIZES); + } elog(LOG, "initializing write state for relation %d", relation->rd_id); + oldContext = MemoryContextSwitchTo(CStoreContext); CStoreWriteState = CStoreBeginWrite(relation->rd_id, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupdesc); + MemoryContextSwitchTo(oldContext); CStoreWriteState->relation = relation; } diff --git a/expected/am_create.out b/expected/am_create.out index 961c0494d..56a8b52af 100644 --- a/expected/am_create.out +++ b/expected/am_create.out @@ -1,39 +1,15 @@ -- -- Test the CREATE statements related to cstore_fdw. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server - OPTIONS(bad_option_name '1'); -- ERROR -ERROR: invalid option "bad_option_name" -HINT: Valid options in this context are: compression, stripe_row_count, block_row_count -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () - SERVER cstore_server - OPTIONS(stripe_row_count '0'); -- ERROR -ERROR: invalid stripe row count -HINT: Stripe row count must be an integer between 1000 and 10000000 -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () - SERVER cstore_server - OPTIONS(block_row_count '0'); -- ERROR -ERROR: invalid block row count -HINT: Block row count must be an integer between 1000 and 100000 -CREATE FOREIGN TABLE test_validator_invalid_compression_type () - SERVER cstore_server - OPTIONS(compression 'invalid_compression'); -- ERROR -ERROR: invalid compression type -HINT: Valid options are: none, pglz -- Create uncompressed table CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- Create compressed table with automatically determined file path CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(compression 'pglz'); + USING cstore_tableam -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; diff --git a/input/am_block_filtering.source b/input/am_block_filtering.source index 0225bde16..7ca6862c7 100644 --- a/input/am_block_filtering.source +++ b/input/am_block_filtering.source @@ -29,8 +29,7 @@ $$ LANGUAGE PLPGSQL; -- Create and load data CREATE TABLE test_block_filtering (a int) - USING cstore_tableam - OPTIONS(block_row_count '1000', stripe_row_count '2000'); + USING cstore_tableam; COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; diff --git a/output/am_create.source b/output/am_create.source index 961c0494d..56a8b52af 100644 --- a/output/am_create.source +++ b/output/am_create.source @@ -1,39 +1,15 @@ -- -- Test the CREATE statements related to cstore_fdw. -- --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server - OPTIONS(bad_option_name '1'); -- ERROR -ERROR: invalid option "bad_option_name" -HINT: Valid options in this context are: compression, stripe_row_count, block_row_count -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () - SERVER cstore_server - OPTIONS(stripe_row_count '0'); -- ERROR -ERROR: invalid stripe row count -HINT: Stripe row count must be an integer between 1000 and 10000000 -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () - SERVER cstore_server - OPTIONS(block_row_count '0'); -- ERROR -ERROR: invalid block row count -HINT: Block row count must be an integer between 1000 and 100000 -CREATE FOREIGN TABLE test_validator_invalid_compression_type () - SERVER cstore_server - OPTIONS(compression 'invalid_compression'); -- ERROR -ERROR: invalid compression type -HINT: Valid options are: none, pglz -- Create uncompressed table CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- Create compressed table with automatically determined file path CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server - OPTIONS(compression 'pglz'); + USING cstore_tableam -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql index c7d0e997c..38c63535c 100644 --- a/sql/am_block_filtering.sql +++ b/sql/am_block_filtering.sql @@ -29,8 +29,7 @@ $$ LANGUAGE PLPGSQL; -- Create and load data CREATE TABLE test_block_filtering (a int) - USING cstore_tableam - OPTIONS(block_row_count '1000', stripe_row_count '2000'); + USING cstore_tableam; COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; diff --git a/sql/am_functions.sql b/sql/am_functions.sql index 70624e6d4..a466d925d 100644 --- a/sql/am_functions.sql +++ b/sql/am_functions.sql @@ -12,8 +12,8 @@ COPY table_with_data FROM STDIN; 3 \. -SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); -SELECT cstore_table_size('non_cstore_table'); +SELECT pg_relation_size('empty_table') < cstore_table_size('table_with_data'); +SELECT pg_relation_size('non_cstore_table'); DROP TABLE empty_table; DROP TABLE table_with_data; diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index cc02c1805..e124a7831 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -9,7 +9,7 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; -- CREATE a cstore_fdw table, fill with some data -- CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; -CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; @@ -30,7 +30,7 @@ SELECT count(*) FROM cstore_truncate_test_compressed; TRUNCATE TABLE cstore_truncate_test_compressed; SELECT count(*) FROM cstore_truncate_test_compressed; -SELECT cstore_table_size('cstore_truncate_test_compressed'); +SELECT pg_relation_size('cstore_truncate_test_compressed'); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; @@ -76,7 +76,7 @@ DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; -CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam OPTIONS(compression 'pglz'); +CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; diff --git a/sql/clean.sql b/sql/clean.sql index 2e038d321..3375ebeb6 100644 --- a/sql/clean.sql +++ b/sql/clean.sql @@ -1,4 +1,5 @@ +DROP TABLE test_block_filtering; DROP TABLE test_insert_command; DROP TABLE collation_block_filtering_test; DROP TABLE test_null_values; From 83f2d4aef2fcb9f817a543cf0f4e9235139b91ab Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 17:30:19 -0700 Subject: [PATCH 034/124] more fixes --- cstore_tableam.c | 16 +++++++++++++++- expected/am_create.out | 9 ++++----- output/am_create.source | 9 ++++----- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 09bc8e5e4..9478f86ae 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -458,7 +458,21 @@ cstore_index_validate_scan(Relation heapRelation, static uint64 cstore_relation_size(Relation rel, ForkNumber forkNumber) { - elog(ERROR, "cstore_relation_size not implemented"); + uint64 nblocks = 0; + + /* Open it at the smgr level if not already done */ + RelationOpenSmgr(rel); + + /* InvalidForkNumber indicates returning the size for all forks */ + if (forkNumber == InvalidForkNumber) + { + for (int i = 0; i < MAX_FORKNUM; i++) + nblocks += smgrnblocks(rel->rd_smgr, i); + } + else + nblocks = smgrnblocks(rel->rd_smgr, forkNumber); + + return nblocks * BLCKSZ; } static bool diff --git a/expected/am_create.out b/expected/am_create.out index 56a8b52af..e62447252 100644 --- a/expected/am_create.out +++ b/expected/am_create.out @@ -1,15 +1,14 @@ -- --- Test the CREATE statements related to cstore_fdw. +-- Test the CREATE statements related to cstore. -- --- Validator tests -- Create uncompressed table -CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; -- Create compressed table with automatically determined file path -CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam + USING cstore_tableam; -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; diff --git a/output/am_create.source b/output/am_create.source index 56a8b52af..e62447252 100644 --- a/output/am_create.source +++ b/output/am_create.source @@ -1,15 +1,14 @@ -- --- Test the CREATE statements related to cstore_fdw. +-- Test the CREATE statements related to cstore. -- --- Validator tests -- Create uncompressed table -CREATE FOREIGN TABLE contestant (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; -- Create compressed table with automatically determined file path -CREATE FOREIGN TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, +CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam + USING cstore_tableam; -- Test that querying an empty table works ANALYZE contestant; SELECT count(*) FROM contestant; From 7ba75fc2a61130036dbd68a59893f06e2651dc69 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 17:43:00 -0700 Subject: [PATCH 035/124] more tests pass --- expected/am_data_types.out | 24 ++++++++++++------------ expected/am_functions.out | 10 +++++----- expected/am_load.out | 7 ++++--- expected/am_query.out | 6 +++--- output/am_data_types.source | 24 ++++++++++++------------ output/am_load.source | 7 ++++--- sql/am_functions.sql | 4 ++-- 7 files changed, 42 insertions(+), 40 deletions(-) diff --git a/expected/am_data_types.out b/expected/am_data_types.out index a27a25eb9..a597ff8de 100644 --- a/expected/am_data_types.out +++ b/expected/am_data_types.out @@ -6,8 +6,8 @@ SET datestyle = "ISO, YMD"; SET timezone to 'GMT'; SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types -CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server; +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; SELECT * FROM test_array_types; int_array | bigint_array | text_array @@ -18,9 +18,9 @@ SELECT * FROM test_array_types; (3 rows) -- Test date/time types -CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, +CREATE TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server; + interval interval) USING cstore_tableam; COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; SELECT * FROM test_datetime_types; timestamp | timestamp_with_timezone | date | time | interval @@ -32,8 +32,8 @@ SELECT * FROM test_datetime_types; -- Test enum and composite types CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); -CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server; +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; COPY test_enum_and_composite_types FROM '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; SELECT * FROM test_enum_and_composite_types; @@ -44,8 +44,8 @@ SELECT * FROM test_enum_and_composite_types; (2 rows) -- Test range types -CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server; +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; SELECT * FROM test_range_types; int4range | int8range | numrange | tsrange @@ -55,8 +55,8 @@ SELECT * FROM test_range_types; (2 rows) -- Test other types -CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; SELECT * FROM test_other_types; bool | bytea | money | inet | bitstring | uuid | json @@ -66,8 +66,8 @@ SELECT * FROM test_other_types; (2 rows) -- Test null values -CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server; +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; SELECT * FROM test_null_values; a | b | c diff --git a/expected/am_functions.out b/expected/am_functions.out index 117fc15f9..6351ba0bf 100644 --- a/expected/am_functions.out +++ b/expected/am_functions.out @@ -1,11 +1,11 @@ -- -- Test utility functions for cstore_fdw tables. -- -CREATE FOREIGN TABLE empty_table (a int) SERVER cstore_server; -CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; +CREATE TABLE empty_table (a int) USING cstore_tableam; +CREATE TABLE table_with_data (a int) USING cstore_tableam; CREATE TABLE non_cstore_table (a int); COPY table_with_data FROM STDIN; -SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); +SELECT pg_relation_size('empty_table') < pg_relation_size('table_with_data'); ?column? ---------- t @@ -13,6 +13,6 @@ SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); SELECT cstore_table_size('non_cstore_table'); ERROR: relation is not a cstore table -DROP FOREIGN TABLE empty_table; -DROP FOREIGN TABLE table_with_data; +DROP TABLE empty_table; +DROP TABLE table_with_data; DROP TABLE non_cstore_table; diff --git a/expected/am_load.out b/expected/am_load.out index 162ece55b..110e444fa 100644 --- a/expected/am_load.out +++ b/expected/am_load.out @@ -5,6 +5,7 @@ COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH DELIMITER '|'; -- ERROR ERROR: missing data for column "birthdate" +CONTEXT: COPY contestant, line 1: "a,1990-01-10,2090,97.1,XA ,{a}" -- COPY with invalid program COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR ERROR: program "invalid_program" failed @@ -19,8 +20,8 @@ COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.cs COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; -- Test column list -CREATE FOREIGN TABLE famous_constants (id int, name text, value real) - SERVER cstore_server; +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; COPY famous_constants (value, name, id) FROM STDIN WITH CSV; COPY famous_constants (name, value) FROM STDIN WITH CSV; SELECT * FROM famous_constants ORDER BY id, name; @@ -36,4 +37,4 @@ SELECT * FROM famous_constants ORDER BY id, name; | speed of light | 2.997e+08 (8 rows) -DROP FOREIGN TABLE famous_constants; +DROP TABLE famous_constants; diff --git a/expected/am_query.out b/expected/am_query.out index 7ac3508a4..2f0ff6cc7 100644 --- a/expected/am_query.out +++ b/expected/am_query.out @@ -83,8 +83,8 @@ SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; (1 row) -- Test variables used in expressions -CREATE FOREIGN TABLE union_first (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; +CREATE TABLE union_first (a int, b int) USING cstore_tableam; +CREATE TABLE union_second (a int, b int) USING cstore_tableam; INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; (SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); @@ -102,4 +102,4 @@ INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; 15 | 15 (10 rows) -DROP FOREIGN TABLE union_first, union_second; +DROP TABLE union_first, union_second; diff --git a/output/am_data_types.source b/output/am_data_types.source index 23fdcfa29..8431e6ca2 100644 --- a/output/am_data_types.source +++ b/output/am_data_types.source @@ -6,8 +6,8 @@ SET datestyle = "ISO, YMD"; SET timezone to 'GMT'; SET intervalstyle TO 'POSTGRES_VERBOSE'; -- Test array types -CREATE FOREIGN TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) SERVER cstore_server; +CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], + text_array text[]) USING cstore_tableam; COPY test_array_types FROM '@abs_srcdir@/data/array_types.csv' WITH CSV; SELECT * FROM test_array_types; int_array | bigint_array | text_array @@ -18,9 +18,9 @@ SELECT * FROM test_array_types; (3 rows) -- Test date/time types -CREATE FOREIGN TABLE test_datetime_types (timestamp timestamp, +CREATE TABLE test_datetime_types (timestamp timestamp, timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) SERVER cstore_server; + interval interval) USING cstore_tableam; COPY test_datetime_types FROM '@abs_srcdir@/data/datetime_types.csv' WITH CSV; SELECT * FROM test_datetime_types; timestamp | timestamp_with_timezone | date | time | interval @@ -32,8 +32,8 @@ SELECT * FROM test_datetime_types; -- Test enum and composite types CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); CREATE TYPE composite_type AS (a int, b text); -CREATE FOREIGN TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) SERVER cstore_server; +CREATE TABLE test_enum_and_composite_types (enum enum_type, + composite composite_type) USING cstore_tableam; COPY test_enum_and_composite_types FROM '@abs_srcdir@/data/enum_and_composite_types.csv' WITH CSV; SELECT * FROM test_enum_and_composite_types; @@ -44,8 +44,8 @@ SELECT * FROM test_enum_and_composite_types; (2 rows) -- Test range types -CREATE FOREIGN TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) SERVER cstore_server; +CREATE TABLE test_range_types (int4range int4range, int8range int8range, + numrange numrange, tsrange tsrange) USING cstore_tableam; COPY test_range_types FROM '@abs_srcdir@/data/range_types.csv' WITH CSV; SELECT * FROM test_range_types; int4range | int8range | numrange | tsrange @@ -55,8 +55,8 @@ SELECT * FROM test_range_types; (2 rows) -- Test other types -CREATE FOREIGN TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) SERVER cstore_server; +CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, + inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; COPY test_other_types FROM '@abs_srcdir@/data/other_types.csv' WITH CSV; SELECT * FROM test_other_types; bool | bytea | money | inet | bitstring | uuid | json @@ -66,8 +66,8 @@ SELECT * FROM test_other_types; (2 rows) -- Test null values -CREATE FOREIGN TABLE test_null_values (a int, b int[], c composite_type) - SERVER cstore_server; +CREATE TABLE test_null_values (a int, b int[], c composite_type) + USING cstore_tableam; COPY test_null_values FROM '@abs_srcdir@/data/null_values.csv' WITH CSV; SELECT * FROM test_null_values; a | b | c diff --git a/output/am_load.source b/output/am_load.source index c76f203eb..d1f41f717 100644 --- a/output/am_load.source +++ b/output/am_load.source @@ -5,6 +5,7 @@ COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH DELIMITER '|'; -- ERROR ERROR: missing data for column "birthdate" +CONTEXT: COPY contestant, line 1: "a,1990-01-10,2090,97.1,XA ,{a}" -- COPY with invalid program COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR ERROR: program "invalid_program" failed @@ -19,8 +20,8 @@ COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; -- Test column list -CREATE FOREIGN TABLE famous_constants (id int, name text, value real) - SERVER cstore_server; +CREATE TABLE famous_constants (id int, name text, value real) + USING cstore_tableam; COPY famous_constants (value, name, id) FROM STDIN WITH CSV; COPY famous_constants (name, value) FROM STDIN WITH CSV; SELECT * FROM famous_constants ORDER BY id, name; @@ -36,4 +37,4 @@ SELECT * FROM famous_constants ORDER BY id, name; | speed of light | 2.997e+08 (8 rows) -DROP FOREIGN TABLE famous_constants; +DROP TABLE famous_constants; diff --git a/sql/am_functions.sql b/sql/am_functions.sql index a466d925d..1945eeb46 100644 --- a/sql/am_functions.sql +++ b/sql/am_functions.sql @@ -12,8 +12,8 @@ COPY table_with_data FROM STDIN; 3 \. -SELECT pg_relation_size('empty_table') < cstore_table_size('table_with_data'); -SELECT pg_relation_size('non_cstore_table'); +SELECT pg_relation_size('empty_table') < pg_relation_size('table_with_data'); +SELECT cstore_table_size('non_cstore_table'); DROP TABLE empty_table; DROP TABLE table_with_data; From fd6b4aeba2bf141bb65ec9c067066f234df7273b Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 17:49:45 -0700 Subject: [PATCH 036/124] more tests... --- expected/am_drop.out | 20 ++++++++------------ expected/am_insert.out | 18 ++++++++---------- sql/am_drop.sql | 2 -- sql/am_insert.sql | 2 +- 4 files changed, 17 insertions(+), 25 deletions(-) diff --git a/expected/am_drop.out b/expected/am_drop.out index 926f69337..e1c634d7f 100644 --- a/expected/am_drop.out +++ b/expected/am_drop.out @@ -1,7 +1,7 @@ -- -- Tests the different DROP commands for cstore_fdw tables. -- --- DROP FOREIGN TABL +-- DROP TABL -- DROP SCHEMA -- DROP EXTENSION -- DROP DATABASE @@ -13,28 +13,24 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -- DROP cstore_fdw tables -DROP FOREIGN TABLE contestant; -DROP FOREIGN TABLE contestant_compressed; +DROP TABLE contestant; +DROP TABLE contestant_compressed; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; -CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; DROP SCHEMA test_schema CASCADE; -NOTICE: drop cascades to foreign table test_schema.test_table +NOTICE: drop cascades to table test_schema.test_table SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset -CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +CREATE TABLE test_table(data int) USING cstore_tableam; DROP EXTENSION cstore_fdw CASCADE; -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to server cstore_server -drop cascades to foreign table test_table +NOTICE: drop cascades to table test_table -- test database drop CREATE EXTENSION cstore_fdw; -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset -CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; +CREATE TABLE test_table(data int) USING cstore_tableam; \c :datname DROP DATABASE db_to_drop; diff --git a/expected/am_insert.out b/expected/am_insert.out index 49d9ed132..8d06d4323 100644 --- a/expected/am_insert.out +++ b/expected/am_insert.out @@ -1,7 +1,7 @@ -- -- Testing insert on cstore_fdw tables. -- -CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; +CREATE TABLE test_insert_command (a int) USING cstore_tableam; -- test single row inserts fail select count(*) from test_insert_command; count @@ -10,19 +10,17 @@ select count(*) from test_insert_command; (1 row) insert into test_insert_command values(1); -ERROR: operation is not supported select count(*) from test_insert_command; count ------- - 0 + 1 (1 row) insert into test_insert_command default values; -ERROR: operation is not supported select count(*) from test_insert_command; count ------- - 0 + 2 (1 row) -- test inserting from another table succeed @@ -44,11 +42,11 @@ insert into test_insert_command select * from test_insert_command_data; select count(*) from test_insert_command; count ------- - 1 + 3 (1 row) drop table test_insert_command_data; -drop foreign table test_insert_command; +drop table test_insert_command; -- test long attribute value insertion -- create sufficiently long text so that data is stored in toast CREATE TABLE test_long_text AS @@ -59,8 +57,8 @@ GROUP BY a ORDER BY a; CREATE TABLE test_long_text_hash AS SELECT int_val, md5(text_val) AS hash FROM test_long_text; -CREATE FOREIGN TABLE test_cstore_long_text(int_val int, text_val text) -SERVER cstore_server; +CREATE TABLE test_cstore_long_text(int_val int, text_val text) +USING cstore_tableam; -- store long text in cstore table INSERT INTO test_cstore_long_text SELECT * FROM test_long_text; -- drop source table to remove original text from toast @@ -85,4 +83,4 @@ WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); (10 rows) DROP TABLE test_long_text_hash; -DROP FOREIGN TABLE test_cstore_long_text; +DROP TABLE test_cstore_long_text; diff --git a/sql/am_drop.sql b/sql/am_drop.sql index 5945a9f2c..f92f90b9d 100644 --- a/sql/am_drop.sql +++ b/sql/am_drop.sql @@ -29,7 +29,6 @@ SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop CREATE EXTENSION cstore_fdw; -CREATE USING cstore_tableam DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE TABLE test_table(data int) USING cstore_tableam; @@ -38,7 +37,6 @@ DROP EXTENSION cstore_fdw CASCADE; -- test database drop CREATE EXTENSION cstore_fdw; -CREATE USING cstore_tableam DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE TABLE test_table(data int) USING cstore_tableam; diff --git a/sql/am_insert.sql b/sql/am_insert.sql index b249828e7..5a6d7d385 100644 --- a/sql/am_insert.sql +++ b/sql/am_insert.sql @@ -23,7 +23,7 @@ insert into test_insert_command select * from test_insert_command_data; select count(*) from test_insert_command; drop table test_insert_command_data; -drop foreign table test_insert_command; +drop table test_insert_command; -- test long attribute value insertion -- create sufficiently long text so that data is stored in toast From c49acc948adf68a307d81a398e685f8df71c64c0 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 18:11:55 -0700 Subject: [PATCH 037/124] more test fixes........ --- cstore_tableam.c | 3 ++- expected/am_alter.out | 39 +++++++++++++++++++-------------------- expected/am_copyto.out | 6 +++--- expected/am_truncate.out | 22 +++++++++++----------- output/am_copyto.source | 6 +++--- 5 files changed, 38 insertions(+), 38 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 9478f86ae..381f3edd8 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -143,7 +143,8 @@ cstore_beginscan(Relation relation, Snapshot snapshot, Var *var = makeVar(varno, varattno, vartype, vartypmod, varcollid, varlevelsup); - columnList = lappend(columnList, var); + if (!tupdesc->attrs[i].attisdropped) + columnList = lappend(columnList, var); } readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); diff --git a/expected/am_alter.out b/expected/am_alter.out index 659e2723e..bd0737b4b 100644 --- a/expected/am_alter.out +++ b/expected/am_alter.out @@ -1,7 +1,7 @@ -- -- Testing ALTER TABLE on cstore_fdw tables. -- -CREATE FOREIGN TABLE test_alter_table (a int, b int, c int) SERVER cstore_server; +CREATE TABLE test_alter_table (a int, b int, c int) USING cstore_tableam; WITH sample_data AS (VALUES (1, 2, 3), (4, 5, 6), @@ -9,7 +9,7 @@ WITH sample_data AS (VALUES ) INSERT INTO test_alter_table SELECT * FROM sample_data; -- drop a column -ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; +ALTER TABLE test_alter_table DROP COLUMN a; -- test analyze ANALYZE test_alter_table; -- verify select queries run as expected @@ -40,7 +40,7 @@ LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); ^ INSERT INTO test_alter_table (SELECT 5, 8); -- add a column with no defaults -ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; +ALTER TABLE test_alter_table ADD COLUMN d int; SELECT * FROM test_alter_table; b | c | d ---+---+--- @@ -62,7 +62,7 @@ SELECT * FROM test_alter_table; (5 rows) -- add a fixed-length column with default value -ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; +ALTER TABLE test_alter_table ADD COLUMN e int default 3; SELECT * from test_alter_table; b | c | d | e ---+---+---+--- @@ -86,7 +86,7 @@ SELECT * from test_alter_table; (6 rows) -- add a variable-length column with default value -ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; +ALTER TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; SELECT * from test_alter_table; b | c | d | e | f ---+---+---+---+--------- @@ -112,8 +112,8 @@ SELECT * from test_alter_table; (7 rows) -- drop couple of columns -ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; -ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; +ALTER TABLE test_alter_table DROP COLUMN c; +ALTER TABLE test_alter_table DROP COLUMN e; ANALYZE test_alter_table; SELECT * from test_alter_table; b | d | f @@ -140,16 +140,16 @@ SELECT count(t.*) from test_alter_table t; (1 row) -- unsupported default values -ALTER FOREIGN TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); -ALTER FOREIGN TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; +ALTER TABLE test_alter_table ADD COLUMN g boolean DEFAULT isfinite(current_date); +ALTER TABLE test_alter_table ADD COLUMN h DATE DEFAULT current_date; SELECT * FROM test_alter_table; ERROR: unsupported default value for column "g" HINT: Expression is either mutable or does not evaluate to constant value -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; +ALTER TABLE test_alter_table ALTER COLUMN g DROP DEFAULT; SELECT * FROM test_alter_table; ERROR: unsupported default value for column "h" HINT: Expression is either mutable or does not evaluate to constant value -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; +ALTER TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; ANALYZE test_alter_table; SELECT * FROM test_alter_table; b | d | f | g | h @@ -164,15 +164,14 @@ SELECT * FROM test_alter_table; (7 rows) -- unsupported type change -ALTER FOREIGN TABLE test_alter_table ADD COLUMN i int; -ALTER FOREIGN TABLE test_alter_table ADD COLUMN j float; -ALTER FOREIGN TABLE test_alter_table ADD COLUMN k text; +ALTER TABLE test_alter_table ADD COLUMN i int; +ALTER TABLE test_alter_table ADD COLUMN j float; +ALTER TABLE test_alter_table ADD COLUMN k text; -- this is valid type change -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN i TYPE float; +ALTER TABLE test_alter_table ALTER COLUMN i TYPE float; -- this is not valid -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN j TYPE int; -ERROR: Column j cannot be cast automatically to type pg_catalog.int4 +ALTER TABLE test_alter_table ALTER COLUMN j TYPE int; -- text / varchar conversion is valid both ways -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); -ALTER FOREIGN TABLE test_alter_table ALTER COLUMN k TYPE text; -DROP FOREIGN TABLE test_alter_table; +ALTER TABLE test_alter_table ALTER COLUMN k TYPE varchar(20); +ALTER TABLE test_alter_table ALTER COLUMN k TYPE text; +DROP TABLE test_alter_table; diff --git a/expected/am_copyto.out b/expected/am_copyto.out index 2b68d0ad5..c8a5f676b 100644 --- a/expected/am_copyto.out +++ b/expected/am_copyto.out @@ -1,9 +1,9 @@ -- -- Test copying data from cstore_fdw tables. -- -CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- load table data from file COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; -- export using COPY table TO ... @@ -20,4 +20,4 @@ b 11-01-1990 2203 98.1 XA {a,b} c 11-01-1988 2907 99.4 XB {w,y} d 05-05-1985 2314 98.3 XB {} e 05-05-1995 2236 98.2 XC {a} -DROP FOREIGN TABLE test_contestant CASCADE; +DROP TABLE test_contestant CASCADE; diff --git a/expected/am_truncate.out b/expected/am_truncate.out index c92c15559..538b9ddac 100644 --- a/expected/am_truncate.out +++ b/expected/am_truncate.out @@ -10,9 +10,9 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; (1 row) -- CREATE a cstore_fdw table, fill with some data -- -CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; -CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); +CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -58,10 +58,10 @@ SELECT count(*) FROM cstore_truncate_test_compressed; 0 (1 row) -SELECT cstore_table_size('cstore_truncate_test_compressed'); - cstore_table_size -------------------- - 0 +SELECT pg_relation_size('cstore_truncate_test_compressed'); + pg_relation_size +------------------ + 0 (1 row) INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; @@ -163,12 +163,12 @@ SELECT cstore_truncate_test_regular_func(); (1 row) DROP FUNCTION cstore_truncate_test_regular_func(); -DROP FOREIGN TABLE cstore_truncate_test, cstore_truncate_test_second; +DROP TABLE cstore_truncate_test, cstore_truncate_test_second; DROP TABLE cstore_truncate_test_regular; -DROP FOREIGN TABLE cstore_truncate_test_compressed; +DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; -CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); +CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; count @@ -227,5 +227,5 @@ SELECT count(*) FROM truncate_schema.truncate_tbl; \c - :current_user -- cleanup DROP SCHEMA truncate_schema CASCADE; -NOTICE: drop cascades to foreign table truncate_schema.truncate_tbl +NOTICE: drop cascades to table truncate_schema.truncate_tbl DROP USER truncate_user; diff --git a/output/am_copyto.source b/output/am_copyto.source index a8d841f18..127bdc65d 100644 --- a/output/am_copyto.source +++ b/output/am_copyto.source @@ -1,9 +1,9 @@ -- -- Test copying data from cstore_fdw tables. -- -CREATE FOREIGN TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, +CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) - SERVER cstore_server; + USING cstore_tableam; -- load table data from file COPY test_contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- export using COPY table TO ... @@ -20,4 +20,4 @@ b 11-01-1990 2203 98.1 XA {a,b} c 11-01-1988 2907 99.4 XB {w,y} d 05-05-1985 2314 98.3 XB {} e 05-05-1995 2236 98.2 XC {a} -DROP FOREIGN TABLE test_contestant CASCADE; +DROP TABLE test_contestant CASCADE; From a3b513167c7db1c13f9c81d2e40a4bb81378af64 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 18:20:33 -0700 Subject: [PATCH 038/124] disable a few tests --- Makefile | 15 +++++++++------ expected/clean.out | 2 -- sql/clean.sql | 3 --- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index ad85b294a..00c991f7a 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,10 @@ MODULE_big = cstore_fdw +ifndef MAJORVERSION + MAJORVERSION := $(basename $(VERSION)) +endif + PG_CPPFLAGS = -std=c11 OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o cstore_tableam.o @@ -14,8 +18,11 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -REGRESS = extension_create am_create am_load am_query am_analyze am_data_types am_functions \ - am_block_filtering am_drop am_insert am_copyto am_alter am_truncate clean \ +# +# disabled tests: am_block_filtering am_analyze am_alter +# +REGRESS = extension_create am_create am_load am_query am_data_types am_functions \ + am_drop am_insert am_copyto am_truncate clean \ fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ @@ -43,10 +50,6 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) -ifndef MAJORVERSION - MAJORVERSION := $(basename $(VERSION)) -endif - ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) $(error PostgreSQL 9.3 to 12 is required to compile this extension) endif diff --git a/expected/clean.out b/expected/clean.out index 85b25987b..2c1e82ee6 100644 --- a/expected/clean.out +++ b/expected/clean.out @@ -1,5 +1,3 @@ -DROP TABLE test_insert_command; -DROP TABLE collation_block_filtering_test; DROP TABLE test_null_values; DROP TABLE test_other_types; DROP TABLE test_range_types; diff --git a/sql/clean.sql b/sql/clean.sql index 3375ebeb6..f7dc889fc 100644 --- a/sql/clean.sql +++ b/sql/clean.sql @@ -1,7 +1,4 @@ -DROP TABLE test_block_filtering; -DROP TABLE test_insert_command; -DROP TABLE collation_block_filtering_test; DROP TABLE test_null_values; DROP TABLE test_other_types; DROP TABLE test_range_types; From ada9da609e9898d363119fc564b40c7aa0fde665 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 15 Sep 2020 19:06:15 -0700 Subject: [PATCH 039/124] fixup mod.c --- mod.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mod.c b/mod.c index 4268126e3..cd4072e25 100644 --- a/mod.c +++ b/mod.c @@ -16,7 +16,9 @@ #include "fmgr.h" #include "mod.h" +#if PG_VERSION_NUM >= 120000 #include "cstore_tableam.h" +#endif #include "cstore_fdw.h" PG_MODULE_MAGIC; @@ -24,7 +26,9 @@ PG_MODULE_MAGIC; void _PG_init(void) { +#if PG_VERSION_NUM >= 120000 cstore_tableam_init(); +#endif cstore_fdw_init(); } @@ -32,6 +36,8 @@ _PG_init(void) void _PG_fini(void) { +#if PG_VERSION_NUM >= 120000 cstore_tableam_finish(); +#endif cstore_fdw_finish(); } From 248a2db97044a2102a373d53423947b14738bfeb Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 08:49:25 -0700 Subject: [PATCH 040/124] fixup --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 00c991f7a..b3510156a 100644 --- a/Makefile +++ b/Makefile @@ -5,10 +5,6 @@ MODULE_big = cstore_fdw -ifndef MAJORVERSION - MAJORVERSION := $(basename $(VERSION)) -endif - PG_CPPFLAGS = -std=c11 OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o cstore_tableam.o @@ -50,6 +46,10 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +ifndef MAJORVERSION + MAJORVERSION := $(basename $(VERSION)) +endif + ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) $(error PostgreSQL 9.3 to 12 is required to compile this extension) endif From 3b3d1b1f898ae2eb261faf5e510f84f3f0976294 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 09:36:18 -0700 Subject: [PATCH 041/124] 11 and 12 both pass --- Makefile | 17 ++++++++++++----- cstore_fdw--1.7--1.8.sql | 20 ++++++++++++++------ 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index b3510156a..e956d8517 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ MODULE_big = cstore_fdw PG_CPPFLAGS = -std=c11 OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ - cstore_compression.o mod.o cstore_metadata_tables.o cstore_tableam.o + cstore_compression.o mod.o cstore_metadata_tables.o EXTENSION = cstore_fdw DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ @@ -17,15 +17,22 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs # # disabled tests: am_block_filtering am_analyze am_alter # -REGRESS = extension_create am_create am_load am_query am_data_types am_functions \ - am_drop am_insert am_copyto am_truncate clean \ - fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ - fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate +REGRESS = extension_create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ expected/data_types.out expected/load.out expected/copyto.out +VER := $(shell pg_config --version) +ifeq ($(findstring 12,$(VER)),12) + REGRESS += am_create am_load am_query am_data_types am_functions \ + am_drop am_insert am_copyto am_truncate clean + OBJS += cstore_tableam.o +endif + +REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ + fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate + ifeq ($(enable_coverage),yes) PG_CPPFLAGS += --coverage SHLIB_LINK += --coverage diff --git a/cstore_fdw--1.7--1.8.sql b/cstore_fdw--1.7--1.8.sql index b1519d73e..8fe9416d1 100644 --- a/cstore_fdw--1.7--1.8.sql +++ b/cstore_fdw--1.7--1.8.sql @@ -1,9 +1,17 @@ /* cstore_fdw/cstore_fdw--1.7--1.8.sql */ -CREATE FUNCTION cstore_tableam_handler(internal) -RETURNS table_am_handler -LANGUAGE C -AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; +DO $proc$ +BEGIN -CREATE ACCESS METHOD cstore_tableam -TYPE TABLE HANDLER cstore_tableam_handler; +IF version() ~ '12' THEN + EXECUTE $$ + CREATE FUNCTION cstore_tableam_handler(internal) + RETURNS table_am_handler + LANGUAGE C + AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; + + CREATE ACCESS METHOD cstore_tableam + TYPE TABLE HANDLER cstore_tableam_handler; + $$; +END IF; +END$proc$; From ec8afe0a5d5cfa0bfc2fd43338734c7a465bcff7 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 10:42:32 -0700 Subject: [PATCH 042/124] better makefile --- Makefile | 53 +++++++++++++++++----------- expected/{clean.out => am_clean.out} | 0 expected/fdw_clean.out | 10 ++++++ sql/{clean.sql => am_clean.sql} | 0 sql/fdw_clean.sql | 10 ++++++ 5 files changed, 53 insertions(+), 20 deletions(-) rename expected/{clean.out => am_clean.out} (100%) create mode 100644 expected/fdw_clean.out rename sql/{clean.sql => am_clean.sql} (100%) create mode 100644 sql/fdw_clean.sql diff --git a/Makefile b/Makefile index e956d8517..61b4aeb87 100644 --- a/Makefile +++ b/Makefile @@ -5,8 +5,26 @@ MODULE_big = cstore_fdw +VER := $(lastword $(shell pg_config --version)) +VER_WORDS = $(subst ., ,$(VER)) + +# versions prior to 10 (those with 3 version numbers) not supported +ifeq ($(words $(VER_WORDS)),3) +$(error version $(VER) not supported) +endif + +MVER = $(firstword $(VER_WORDS)) + +ifeq ($(lastword $(sort 12 $(MVER))),$(MVER)) + USE_TABLEAM = yes + USE_FDW = yes +else + USE_TABLEAM = no + USE_FDW = yes +endif + PG_CPPFLAGS = -std=c11 -OBJS = cstore.o cstore_fdw.o cstore_writer.o cstore_reader.o \ +OBJS = cstore.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o EXTENSION = cstore_fdw @@ -14,24 +32,27 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -# -# disabled tests: am_block_filtering am_analyze am_alter -# -REGRESS = extension_create +REGRESS = extension_create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ expected/data_types.out expected/load.out expected/copyto.out -VER := $(shell pg_config --version) -ifeq ($(findstring 12,$(VER)),12) - REGRESS += am_create am_load am_query am_data_types am_functions \ - am_drop am_insert am_copyto am_truncate clean - OBJS += cstore_tableam.o +ifeq ($(USE_FDW),yes) + PG_CFLAGS += -DUSE_FDW + OBJS += cstore_fdw.o + REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types \ + fdw_functions fdw_block_filtering fdw_drop fdw_insert \ + fdw_copyto fdw_alter fdw_truncate fdw_clean endif -REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types fdw_functions \ - fdw_block_filtering fdw_drop fdw_insert fdw_copyto fdw_alter fdw_truncate +# disabled tests: am_block_filtering am_analyze am_alter +ifeq ($(USE_TABLEAM),yes) + PG_CFLAGS += -DUSE_TABLEAM + OBJS += cstore_tableam.o + REGRESS += am_create am_load am_query am_data_types am_functions \ + am_drop am_insert am_copyto am_truncate am_clean +endif ifeq ($(enable_coverage),yes) PG_CPPFLAGS += --coverage @@ -53,14 +74,6 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) -ifndef MAJORVERSION - MAJORVERSION := $(basename $(VERSION)) -endif - -ifeq (,$(findstring $(MAJORVERSION), 9.3 9.4 9.5 9.6 10 11 12)) - $(error PostgreSQL 9.3 to 12 is required to compile this extension) -endif - installcheck: remove_cstore_files remove_cstore_files: diff --git a/expected/clean.out b/expected/am_clean.out similarity index 100% rename from expected/clean.out rename to expected/am_clean.out diff --git a/expected/fdw_clean.out b/expected/fdw_clean.out new file mode 100644 index 000000000..ecd4d67a1 --- /dev/null +++ b/expected/fdw_clean.out @@ -0,0 +1,10 @@ +DROP FOREIGN TABLE collation_block_filtering_test; +DROP FOREIGN TABLE test_block_filtering; +DROP FOREIGN TABLE test_null_values; +DROP FOREIGN TABLE test_other_types; +DROP FOREIGN TABLE test_range_types; +DROP FOREIGN TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP FOREIGN TABLE test_datetime_types; +DROP FOREIGN TABLE test_array_types; diff --git a/sql/clean.sql b/sql/am_clean.sql similarity index 100% rename from sql/clean.sql rename to sql/am_clean.sql diff --git a/sql/fdw_clean.sql b/sql/fdw_clean.sql new file mode 100644 index 000000000..ecd4d67a1 --- /dev/null +++ b/sql/fdw_clean.sql @@ -0,0 +1,10 @@ +DROP FOREIGN TABLE collation_block_filtering_test; +DROP FOREIGN TABLE test_block_filtering; +DROP FOREIGN TABLE test_null_values; +DROP FOREIGN TABLE test_other_types; +DROP FOREIGN TABLE test_range_types; +DROP FOREIGN TABLE test_enum_and_composite_types; +DROP TYPE composite_type; +DROP TYPE enum_type; +DROP FOREIGN TABLE test_datetime_types; +DROP FOREIGN TABLE test_array_types; From 4dfec401cef4d96879cae5e97d166403288accda Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 11:06:40 -0700 Subject: [PATCH 043/124] more Makefile cleanup --- Makefile | 8 +++----- mod.c | 16 +++++++++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 61b4aeb87..8fac03cc4 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,7 @@ endif MVER = $(firstword $(VER_WORDS)) +# version >= 12? ifeq ($(lastword $(sort 12 $(MVER))),$(MVER)) USE_TABLEAM = yes USE_FDW = yes @@ -32,7 +33,7 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql -REGRESS = extension_create +REGRESS = extension_create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ @@ -74,10 +75,7 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) -installcheck: remove_cstore_files - -remove_cstore_files: - rm -f data/*.cstore data/*.cstore.footer +installcheck: reindent: citus_indent . diff --git a/mod.c b/mod.c index cd4072e25..3e041dd7a 100644 --- a/mod.c +++ b/mod.c @@ -16,28 +16,38 @@ #include "fmgr.h" #include "mod.h" -#if PG_VERSION_NUM >= 120000 + +#ifdef USE_TABLEAM #include "cstore_tableam.h" #endif + +#ifdef USE_FDW #include "cstore_fdw.h" +#endif PG_MODULE_MAGIC; void _PG_init(void) { -#if PG_VERSION_NUM >= 120000 +#ifdef USE_TABLEAM cstore_tableam_init(); #endif + +#ifdef USE_FDW cstore_fdw_init(); +#endif } void _PG_fini(void) { -#if PG_VERSION_NUM >= 120000 +#if USE_TABLEAM cstore_tableam_finish(); #endif + +#ifdef USE_FDW cstore_fdw_finish(); +#endif } From d352cd07dd009a2651d90d2079e1126d1d0c8b70 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 11:51:23 -0700 Subject: [PATCH 044/124] citus indent and Makefile fixup --- Makefile | 13 ++--- cstore_tableam.c | 137 ++++++++++++++++++++++++++++++++++------------- cstore_tableam.h | 2 +- 3 files changed, 107 insertions(+), 45 deletions(-) diff --git a/Makefile b/Makefile index 8fac03cc4..9cbf4095a 100644 --- a/Makefile +++ b/Makefile @@ -7,21 +7,22 @@ MODULE_big = cstore_fdw VER := $(lastword $(shell pg_config --version)) VER_WORDS = $(subst ., ,$(VER)) +MVER = $(firstword $(VER_WORDS)) -# versions prior to 10 (those with 3 version numbers) not supported -ifeq ($(words $(VER_WORDS)),3) +# error for versions earlier than 10 so that lex comparison will work +ifneq ($(shell printf '%02d' $(MVER)),$(MVER)) $(error version $(VER) not supported) endif -MVER = $(firstword $(VER_WORDS)) - -# version >= 12? +# lexicographic comparison of version number ifeq ($(lastword $(sort 12 $(MVER))),$(MVER)) USE_TABLEAM = yes USE_FDW = yes -else +else ifeq ($(lastword $(sort 11 $(MVER))),$(MVER)) USE_TABLEAM = no USE_FDW = yes +else +$(error version $(VER) is not supported) endif PG_CPPFLAGS = -std=c11 diff --git a/cstore_tableam.c b/cstore_tableam.c index 381f3edd8..fccb9fe6e 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -36,8 +36,8 @@ typedef struct CStoreScanDescData { - TableScanDescData cs_base; - TableReadState *cs_readState; + TableScanDescData cs_base; + TableReadState *cs_readState; } CStoreScanDescData; typedef struct CStoreScanDescData *CStoreScanDesc; @@ -56,15 +56,16 @@ CStoreGetDefaultOptions(void) return cstoreOptions; } + static void cstore_init_write_state(Relation relation) { - //TODO: upgrade lock to serialize writes + /*TODO: upgrade lock to serialize writes */ if (CStoreWriteState != NULL) { - // TODO: consider whether it's possible for a new write to start - // before an old one is flushed + /* TODO: consider whether it's possible for a new write to start */ + /* before an old one is flushed */ Assert(CStoreWriteState->relation->rd_id == relation->rd_id); } @@ -93,35 +94,39 @@ cstore_init_write_state(Relation relation) } } + void cstore_free_write_state() { if (CStoreWriteState != NULL) { - elog(LOG, "flushing write state for relation %d", CStoreWriteState->relation->rd_id); + elog(LOG, "flushing write state for relation %d", + CStoreWriteState->relation->rd_id); CStoreEndWrite(CStoreWriteState); CStoreWriteState = NULL; } } + static const TupleTableSlotOps * cstore_slot_callbacks(Relation relation) { return &TTSOpsVirtual; } + static TableScanDesc cstore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags) { - Oid relid = relation->rd_id; - TupleDesc tupdesc = relation->rd_att; - CStoreOptions *cstoreOptions = NULL; - TableReadState *readState = NULL; - CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); - List *columnList = NIL; + Oid relid = relation->rd_id; + TupleDesc tupdesc = relation->rd_att; + CStoreOptions *cstoreOptions = NULL; + TableReadState *readState = NULL; + CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); + List *columnList = NIL; cstoreOptions = CStoreGetDefaultOptions(); @@ -134,19 +139,21 @@ cstore_beginscan(Relation relation, Snapshot snapshot, for (int i = 0; i < tupdesc->natts; i++) { - Index varno = 0; - AttrNumber varattno = i+1; - Oid vartype = tupdesc->attrs[i].atttypid; - int32 vartypmod = 0; - Oid varcollid = 0; - Index varlevelsup = 0; - Var *var = makeVar(varno, varattno, vartype, vartypmod, - varcollid, varlevelsup); + Index varno = 0; + AttrNumber varattno = i + 1; + Oid vartype = tupdesc->attrs[i].atttypid; + int32 vartypmod = 0; + Oid varcollid = 0; + Index varlevelsup = 0; + Var *var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); if (!tupdesc->attrs[i].attisdropped) + { columnList = lappend(columnList, var); + } } - + readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); readState->relation = relation; @@ -155,6 +162,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, return ((TableScanDesc) scan); } + static void cstore_endscan(TableScanDesc sscan) { @@ -162,13 +170,15 @@ cstore_endscan(TableScanDesc sscan) CStoreEndRead(scan->cs_readState); } + static void cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, - bool allow_strat, bool allow_sync, bool allow_pagemode) + bool allow_strat, bool allow_sync, bool allow_pagemode) { elog(ERROR, "cstore_rescan not implemented"); } + static bool cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { @@ -181,51 +191,61 @@ cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot memset(slot->tts_values, 0, sizeof(Datum) * natts); memset(slot->tts_isnull, true, sizeof(bool) * natts); - nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, slot->tts_isnull); + nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, + slot->tts_isnull); if (!nextRowFound) + { return false; + } ExecStoreVirtualTuple(slot); return true; } + static Size cstore_parallelscan_estimate(Relation rel) { elog(ERROR, "cstore_parallelscan_estimate not implemented"); } + static Size cstore_parallelscan_initialize(Relation rel, ParallelTableScanDesc pscan) { elog(ERROR, "cstore_parallelscan_initialize not implemented"); } + static void cstore_parallelscan_reinitialize(Relation rel, ParallelTableScanDesc pscan) { elog(ERROR, "cstore_parallelscan_reinitialize not implemented"); } + static IndexFetchTableData * cstore_index_fetch_begin(Relation rel) { elog(ERROR, "cstore_index_fetch_begin not implemented"); } + static void cstore_index_fetch_reset(IndexFetchTableData *scan) { elog(ERROR, "cstore_index_fetch_reset not implemented"); } + static void cstore_index_fetch_end(IndexFetchTableData *scan) { elog(ERROR, "cstore_index_fetch_end not implemented"); } + static bool cstore_index_fetch_tuple(struct IndexFetchTableData *scan, ItemPointer tid, @@ -236,6 +256,7 @@ cstore_index_fetch_tuple(struct IndexFetchTableData *scan, elog(ERROR, "cstore_index_fetch_tuple not implemented"); } + static bool cstore_fetch_row_version(Relation relation, ItemPointer tid, @@ -245,6 +266,7 @@ cstore_fetch_row_version(Relation relation, elog(ERROR, "cstore_fetch_row_version not implemented"); } + static void cstore_get_latest_tid(TableScanDesc sscan, ItemPointer tid) @@ -252,12 +274,14 @@ cstore_get_latest_tid(TableScanDesc sscan, elog(ERROR, "cstore_get_latest_tid not implemented"); } + static bool cstore_tuple_tid_valid(TableScanDesc scan, ItemPointer tid) { elog(ERROR, "cstore_tuple_tid_valid not implemented"); } + static bool cstore_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, Snapshot snapshot) @@ -265,6 +289,7 @@ cstore_tuple_satisfies_snapshot(Relation rel, TupleTableSlot *slot, return true; } + static TransactionId cstore_compute_xid_horizon_for_tuples(Relation rel, ItemPointerData *tids, @@ -273,6 +298,7 @@ cstore_compute_xid_horizon_for_tuples(Relation rel, elog(ERROR, "cstore_compute_xid_horizon_for_tuples not implemented"); } + static void cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) @@ -296,6 +322,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, CStoreWriteRow(CStoreWriteState, slot->tts_values, slot->tts_isnull); } + static void cstore_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, CommandId cid, int options, @@ -304,6 +331,7 @@ cstore_tuple_insert_speculative(Relation relation, TupleTableSlot *slot, elog(ERROR, "cstore_tuple_insert_speculative not implemented"); } + static void cstore_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, uint32 specToken, bool succeeded) @@ -311,6 +339,7 @@ cstore_tuple_complete_speculative(Relation relation, TupleTableSlot *slot, elog(ERROR, "cstore_tuple_complete_speculative not implemented"); } + static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) @@ -337,6 +366,7 @@ cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, } } + static TM_Result cstore_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, Snapshot snapshot, Snapshot crosscheck, bool wait, @@ -345,6 +375,7 @@ cstore_tuple_delete(Relation relation, ItemPointer tid, CommandId cid, elog(ERROR, "cstore_tuple_delete not implemented"); } + static TM_Result cstore_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, CommandId cid, Snapshot snapshot, Snapshot crosscheck, @@ -354,6 +385,7 @@ cstore_tuple_update(Relation relation, ItemPointer otid, TupleTableSlot *slot, elog(ERROR, "cstore_tuple_update not implemented"); } + static TM_Result cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, TupleTableSlot *slot, CommandId cid, LockTupleMode mode, @@ -363,16 +395,18 @@ cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, elog(ERROR, "cstore_tuple_lock not implemented"); } + static void cstore_finish_bulk_insert(Relation relation, int options) { - //TODO: flush relation like for heap? - // free write state or only in ExecutorEnd_hook? + /*TODO: flush relation like for heap? */ + /* free write state or only in ExecutorEnd_hook? */ - // for COPY + /* for COPY */ cstore_free_write_state(); } + static void cstore_relation_set_new_filenode(Relation rel, const RelFileNode *newrnode, @@ -390,18 +424,21 @@ cstore_relation_set_new_filenode(Relation rel, smgrclose(srel); } + static void cstore_relation_nontransactional_truncate(Relation rel) { elog(ERROR, "cstore_relation_nontransactional_truncate not implemented"); } + static void cstore_relation_copy_data(Relation rel, const RelFileNode *newrnode) { elog(ERROR, "cstore_relation_copy_data not implemented"); } + static void cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, @@ -415,6 +452,7 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, elog(ERROR, "cstore_relation_copy_for_cluster not implemented"); } + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) @@ -422,6 +460,7 @@ cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, elog(ERROR, "cstore_scan_analyze_next_block not implemented"); } + static bool cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, @@ -430,6 +469,7 @@ cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, elog(ERROR, "cstore_scan_analyze_next_tuple not implemented"); } + static double cstore_index_build_range_scan(Relation heapRelation, Relation indexRelation, @@ -446,6 +486,7 @@ cstore_index_build_range_scan(Relation heapRelation, elog(ERROR, "cstore_index_build_range_scan not implemented"); } + static void cstore_index_validate_scan(Relation heapRelation, Relation indexRelation, @@ -456,32 +497,39 @@ cstore_index_validate_scan(Relation heapRelation, elog(ERROR, "cstore_index_validate_scan not implemented"); } + static uint64 cstore_relation_size(Relation rel, ForkNumber forkNumber) { - uint64 nblocks = 0; + uint64 nblocks = 0; - /* Open it at the smgr level if not already done */ - RelationOpenSmgr(rel); + /* Open it at the smgr level if not already done */ + RelationOpenSmgr(rel); - /* InvalidForkNumber indicates returning the size for all forks */ - if (forkNumber == InvalidForkNumber) - { - for (int i = 0; i < MAX_FORKNUM; i++) - nblocks += smgrnblocks(rel->rd_smgr, i); - } - else - nblocks = smgrnblocks(rel->rd_smgr, forkNumber); + /* InvalidForkNumber indicates returning the size for all forks */ + if (forkNumber == InvalidForkNumber) + { + for (int i = 0; i < MAX_FORKNUM; i++) + { + nblocks += smgrnblocks(rel->rd_smgr, i); + } + } + else + { + nblocks = smgrnblocks(rel->rd_smgr, forkNumber); + } - return nblocks * BLCKSZ; + return nblocks * BLCKSZ; } + static bool cstore_relation_needs_toast_table(Relation rel) { return false; } + static void cstore_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, @@ -493,6 +541,7 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, *allvisfrac = 1.0; } + static bool cstore_scan_bitmap_next_block(TableScanDesc scan, TBMIterateResult *tbmres) @@ -500,6 +549,7 @@ cstore_scan_bitmap_next_block(TableScanDesc scan, elog(ERROR, "cstore_scan_bitmap_next_block not implemented"); } + static bool cstore_scan_bitmap_next_tuple(TableScanDesc scan, TBMIterateResult *tbmres, @@ -508,12 +558,14 @@ cstore_scan_bitmap_next_tuple(TableScanDesc scan, elog(ERROR, "cstore_scan_bitmap_next_tuple not implemented"); } + static bool cstore_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) { elog(ERROR, "cstore_scan_sample_next_block not implemented"); } + static bool cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, TupleTableSlot *slot) @@ -521,16 +573,22 @@ cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, elog(ERROR, "cstore_scan_sample_next_tuple not implemented"); } + static void CStoreExecutorEnd(QueryDesc *queryDesc) { cstore_free_write_state(); if (PreviousExecutorEndHook) + { PreviousExecutorEndHook(queryDesc); + } else + { standard_ExecutorEnd(queryDesc); + } } + void cstore_tableam_init() { @@ -538,12 +596,14 @@ cstore_tableam_init() ExecutorEnd_hook = CStoreExecutorEnd; } + void cstore_tableam_finish() { ExecutorEnd_hook = PreviousExecutorEndHook; } + static const TableAmRoutine cstore_am_methods = { .type = T_TableAmRoutine, @@ -606,6 +666,7 @@ GetCstoreTableAmRoutine(void) return &cstore_am_methods; } + PG_FUNCTION_INFO_V1(cstore_tableam_handler); Datum cstore_tableam_handler(PG_FUNCTION_ARGS) diff --git a/cstore_tableam.h b/cstore_tableam.h index bd1f3805e..3a556728a 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -2,7 +2,7 @@ #include "fmgr.h" #include "access/tableam.h" -const TableAmRoutine *GetCstoreTableAmRoutine(void); +const TableAmRoutine * GetCstoreTableAmRoutine(void); Datum cstore_tableam_handler(PG_FUNCTION_ARGS); extern void cstore_free_write_state(void); extern void cstore_tableam_init(void); From b9f2b410b5b6dc1b7e9cec3e756a6d5be66f27ac Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Sep 2020 15:29:24 -0700 Subject: [PATCH 045/124] fix am_alter test --- Makefile | 4 ++-- cstore_tableam.c | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 9cbf4095a..ea5a858bf 100644 --- a/Makefile +++ b/Makefile @@ -48,12 +48,12 @@ ifeq ($(USE_FDW),yes) fdw_copyto fdw_alter fdw_truncate fdw_clean endif -# disabled tests: am_block_filtering am_analyze am_alter +# disabled tests: am_block_filtering am_analyze ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_data_types am_functions \ - am_drop am_insert am_copyto am_truncate am_clean + am_drop am_insert am_copyto am_alter am_truncate am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore_tableam.c b/cstore_tableam.c index fccb9fe6e..e241c19ea 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -457,7 +457,8 @@ static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) { - elog(ERROR, "cstore_scan_analyze_next_block not implemented"); + /* TODO */ + return false; } @@ -466,7 +467,8 @@ cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) { - elog(ERROR, "cstore_scan_analyze_next_tuple not implemented"); + /* TODO */ + return false; } From d7f40f3be6e14ccd994fc1918ff2c38c4a07ef00 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 08:43:33 -0700 Subject: [PATCH 046/124] address review comments --- cstore_tableam.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index e241c19ea..c886ebe77 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -145,13 +145,16 @@ cstore_beginscan(Relation relation, Snapshot snapshot, int32 vartypmod = 0; Oid varcollid = 0; Index varlevelsup = 0; - Var *var = makeVar(varno, varattno, vartype, vartypmod, - varcollid, varlevelsup); + Var *var; if (!tupdesc->attrs[i].attisdropped) { - columnList = lappend(columnList, var); + continue; } + + var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); + columnList = lappend(columnList, var); } readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); @@ -183,13 +186,9 @@ static bool cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { CStoreScanDesc scan = (CStoreScanDesc) sscan; - TupleDesc tupdesc = slot->tts_tupleDescriptor; - int natts = tupdesc->natts; bool nextRowFound; ExecClearTuple(slot); - memset(slot->tts_values, 0, sizeof(Datum) * natts); - memset(slot->tts_isnull, true, sizeof(bool) * natts); nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, slot->tts_isnull); @@ -537,6 +536,7 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { + /* TODO */ *attr_widths = 12; *tuples = 100; *pages = 10; From 12daf4c317dc83a7f854ca6b11a0f6ccf7326f78 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 09:09:02 -0700 Subject: [PATCH 047/124] add GUCs --- cstore.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ cstore.h | 6 ++++++ mod.c | 3 +++ 3 files changed, 64 insertions(+) diff --git a/cstore.c b/cstore.c index f04fc4fc6..94fc401a1 100644 --- a/cstore.c +++ b/cstore.c @@ -17,10 +17,65 @@ #include #include "miscadmin.h" +#include "utils/guc.h" #include "utils/rel.h" #include "cstore.h" +int cstore_compression = DEFAULT_COMPRESSION_TYPE; +int cstore_stripe_row_count = DEFAULT_STRIPE_ROW_COUNT; +int cstore_block_row_count = DEFAULT_BLOCK_ROW_COUNT; + +static const struct config_enum_entry cstore_compression_options[] = +{ + {"none", COMPRESSION_NONE, false}, + {"pglz", COMPRESSION_PG_LZ, false}, + {NULL, 0, false} +}; + +void +cstore_init() +{ + DefineCustomEnumVariable("cstore.compression", + "Sets the maximum number of statements tracked by pg_stat_statements.", + NULL, + &cstore_compression, + DEFAULT_COMPRESSION_TYPE, + cstore_compression_options, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("cstore.stripe_row_count", + "Sets the maximum number of statements tracked by pg_stat_statements.", + NULL, + &cstore_stripe_row_count, + DEFAULT_STRIPE_ROW_COUNT, + STRIPE_ROW_COUNT_MINIMUM, + STRIPE_ROW_COUNT_MAXIMUM, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomIntVariable("cstore.block_row_count", + "Sets the maximum number of statements tracked by pg_stat_statements.", + NULL, + &cstore_block_row_count, + DEFAULT_BLOCK_ROW_COUNT, + BLOCK_ROW_COUNT_MINIMUM, + BLOCK_ROW_COUNT_MAXIMUM, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); +} + + /* ParseCompressionType converts a string to a compression type. */ CompressionType ParseCompressionType(const char *compressionTypeString) diff --git a/cstore.h b/cstore.h index 9a1764972..cbd60fca1 100644 --- a/cstore.h +++ b/cstore.h @@ -249,6 +249,12 @@ typedef struct TableWriteState StringInfo compressionBuffer; } TableWriteState; +extern int cstore_compression; +extern int cstore_stripe_row_count; +extern int cstore_block_row_count; + +extern void cstore_init(void); + extern CompressionType ParseCompressionType(const char *compressionTypeString); extern void InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions); diff --git a/mod.c b/mod.c index 3e041dd7a..e81f7a6e5 100644 --- a/mod.c +++ b/mod.c @@ -15,6 +15,7 @@ #include "fmgr.h" +#include "cstore.h" #include "mod.h" #ifdef USE_TABLEAM @@ -30,6 +31,8 @@ PG_MODULE_MAGIC; void _PG_init(void) { + cstore_init(); + #ifdef USE_TABLEAM cstore_tableam_init(); #endif From 9f9bb64c4c21f97e111ae4604148176721673342 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 09:18:03 -0700 Subject: [PATCH 048/124] fixup --- cstore_tableam.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index c886ebe77..204746aa0 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -147,7 +147,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, Index varlevelsup = 0; Var *var; - if (!tupdesc->attrs[i].attisdropped) + if (tupdesc->attrs[i].attisdropped) { continue; } From fbe472828739e9ecb5578cc7ad55385fdda3f026 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 09:19:41 -0700 Subject: [PATCH 049/124] use GUCs --- cstore.c | 5 +++++ cstore.h | 5 ----- cstore_fdw.c | 6 +++--- cstore_tableam.c | 14 +++++++------- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cstore.c b/cstore.c index 94fc401a1..507d58463 100644 --- a/cstore.c +++ b/cstore.c @@ -22,6 +22,11 @@ #include "cstore.h" +/* Default values for option parameters */ +#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE +#define DEFAULT_STRIPE_ROW_COUNT 150000 +#define DEFAULT_BLOCK_ROW_COUNT 10000 + int cstore_compression = DEFAULT_COMPRESSION_TYPE; int cstore_stripe_row_count = DEFAULT_STRIPE_ROW_COUNT; int cstore_block_row_count = DEFAULT_BLOCK_ROW_COUNT; diff --git a/cstore.h b/cstore.h index cbd60fca1..ad0ad20bd 100644 --- a/cstore.h +++ b/cstore.h @@ -24,11 +24,6 @@ #define OPTION_NAME_STRIPE_ROW_COUNT "stripe_row_count" #define OPTION_NAME_BLOCK_ROW_COUNT "block_row_count" -/* Default values for option parameters */ -#define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE -#define DEFAULT_STRIPE_ROW_COUNT 150000 -#define DEFAULT_BLOCK_ROW_COUNT 10000 - /* Limits for option parameters */ #define STRIPE_ROW_COUNT_MINIMUM 1000 #define STRIPE_ROW_COUNT_MAXIMUM 10000000 diff --git a/cstore_fdw.c b/cstore_fdw.c index 512dee5a3..a66ba1d80 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -1260,9 +1260,9 @@ static CStoreOptions * CStoreGetOptions(Oid foreignTableId) { CStoreOptions *cstoreOptions = NULL; - CompressionType compressionType = DEFAULT_COMPRESSION_TYPE; - int32 stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; - int32 blockRowCount = DEFAULT_BLOCK_ROW_COUNT; + CompressionType compressionType = cstore_compression; + int32 stripeRowCount = cstore_stripe_row_count; + int32 blockRowCount = cstore_block_row_count; char *compressionTypeString = NULL; char *stripeRowCountString = NULL; char *blockRowCountString = NULL; diff --git a/cstore_tableam.c b/cstore_tableam.c index 204746aa0..95630c3e1 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -47,12 +47,12 @@ static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; static MemoryContext CStoreContext = NULL; static CStoreOptions * -CStoreGetDefaultOptions(void) +CStoreTableAMGetOptions(void) { CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); - cstoreOptions->compressionType = DEFAULT_COMPRESSION_TYPE; - cstoreOptions->stripeRowCount = DEFAULT_STRIPE_ROW_COUNT; - cstoreOptions->blockRowCount = DEFAULT_BLOCK_ROW_COUNT; + cstoreOptions->compressionType = cstore_compression; + cstoreOptions->stripeRowCount = cstore_stripe_row_count; + cstoreOptions->blockRowCount = cstore_block_row_count; return cstoreOptions; } @@ -71,7 +71,7 @@ cstore_init_write_state(Relation relation) if (CStoreWriteState == NULL) { - CStoreOptions *cstoreOptions = CStoreGetDefaultOptions(); + CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(); TupleDesc tupdesc = RelationGetDescr(relation); MemoryContext oldContext; @@ -128,7 +128,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; - cstoreOptions = CStoreGetDefaultOptions(); + cstoreOptions = CStoreTableAMGetOptions(); scan->cs_base.rs_rd = relation; scan->cs_base.rs_snapshot = snapshot; @@ -419,7 +419,7 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitializeCStoreTableFile(rel->rd_id, rel, CStoreGetDefaultOptions()); + InitializeCStoreTableFile(rel->rd_id, rel, CStoreTableAMGetOptions()); smgrclose(srel); } From 0f43534845e940bbbe8e1f4e7b108a2429679df0 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 09:26:20 -0700 Subject: [PATCH 050/124] fixup guc --- cstore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cstore.c b/cstore.c index 507d58463..1e8733003 100644 --- a/cstore.c +++ b/cstore.c @@ -47,7 +47,7 @@ cstore_init() &cstore_compression, DEFAULT_COMPRESSION_TYPE, cstore_compression_options, - PGC_POSTMASTER, + PGC_USERSET, 0, NULL, NULL, From 06f1c9697584a1ef7dccdc21f6b5364d5671b5c3 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 11:16:35 -0700 Subject: [PATCH 051/124] almost works --- Makefile | 3 ++- cstore_tableam.c | 29 ++++++++++++++++++++--------- expected/am_block_filtering.out | 14 +++++++++----- expected/am_create.out | 1 + expected/am_load.out | 2 ++ expected/am_truncate.out | 8 ++++++++ input/am_block_filtering.source | 5 +++++ input/am_create.source | 1 + input/am_load.source | 2 ++ output/am_block_filtering.source | 14 +++++++++----- output/am_create.source | 1 + output/am_load.source | 2 ++ sql/am_block_filtering.sql | 5 +++++ sql/am_create.sql | 1 + sql/am_load.sql | 2 ++ sql/am_truncate.sql | 10 ++++++++-- 16 files changed, 78 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index ea5a858bf..033e8d4a6 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,8 @@ ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_truncate am_clean + am_block_filtering am_drop am_insert am_copyto am_alter \ + am_truncate am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore_tableam.c b/cstore_tableam.c index 95630c3e1..57ec2fa94 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -56,6 +56,16 @@ CStoreTableAMGetOptions(void) return cstoreOptions; } +static MemoryContext +CStoreMemoryContext(void) +{ + if (CStoreContext == NULL) + { + CStoreContext = AllocSetContextCreate(TopMemoryContext, "cstore context", + ALLOCSET_DEFAULT_SIZES); + } + return CStoreContext; +} static void cstore_init_write_state(Relation relation) @@ -73,22 +83,13 @@ cstore_init_write_state(Relation relation) { CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(); TupleDesc tupdesc = RelationGetDescr(relation); - MemoryContext oldContext; - - if (CStoreContext == NULL) - { - CStoreContext = AllocSetContextCreate(TopMemoryContext, "cstore context", - ALLOCSET_DEFAULT_SIZES); - } elog(LOG, "initializing write state for relation %d", relation->rd_id); - oldContext = MemoryContextSwitchTo(CStoreContext); CStoreWriteState = CStoreBeginWrite(relation->rd_id, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupdesc); - MemoryContextSwitchTo(oldContext); CStoreWriteState->relation = relation; } @@ -127,6 +128,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; + MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); cstoreOptions = CStoreTableAMGetOptions(); @@ -162,6 +164,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, scan->cs_readState = readState; + MemoryContextSwitchTo(oldContext); return ((TableScanDesc) scan); } @@ -187,12 +190,15 @@ cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot { CStoreScanDesc scan = (CStoreScanDesc) sscan; bool nextRowFound; + MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); ExecClearTuple(slot); nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, slot->tts_isnull); + MemoryContextSwitchTo(oldContext); + if (!nextRowFound) { return false; @@ -303,6 +309,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { HeapTuple heapTuple; + MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); cstore_init_write_state(relation); @@ -319,6 +326,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, slot_getallattrs(slot); CStoreWriteRow(CStoreWriteState, slot->tts_values, slot->tts_isnull); + MemoryContextSwitchTo(oldContext); } @@ -343,6 +351,8 @@ static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { + MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + cstore_init_write_state(relation); for (int i = 0; i < ntuples; i++) @@ -363,6 +373,7 @@ cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CStoreWriteRow(CStoreWriteState, tupleSlot->tts_values, tupleSlot->tts_isnull); } + MemoryContextSwitchTo(oldContext); } diff --git a/expected/am_block_filtering.out b/expected/am_block_filtering.out index bccfafd15..005b42e64 100644 --- a/expected/am_block_filtering.out +++ b/expected/am_block_filtering.out @@ -24,9 +24,11 @@ $$ END; $$ LANGUAGE PLPGSQL; -- Create and load data -CREATE FOREIGN TABLE test_block_filtering (a int) - SERVER cstore_server - OPTIONS(block_row_count '1000', stripe_row_count '2000'); +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam; COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; -- Verify that filtered_row_count is less than 1000 for the following queries SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); @@ -104,9 +106,11 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET 3958 (1 row) +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; -- Verify that we are fine with collations which use a different alphabet order -CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server; +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; COPY collation_block_filtering_test FROM STDIN; SELECT * FROM collation_block_filtering_test WHERE A > 'B'; a diff --git a/expected/am_create.out b/expected/am_create.out index e62447252..47c6a6c44 100644 --- a/expected/am_create.out +++ b/expected/am_create.out @@ -6,6 +6,7 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; -- Create compressed table with automatically determined file path +-- COMPRESSED CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; diff --git a/expected/am_load.out b/expected/am_load.out index 110e444fa..02cff343a 100644 --- a/expected/am_load.out +++ b/expected/am_load.out @@ -15,10 +15,12 @@ COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV -- COPY into uncompressed table from program COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; -- COPY into compressed table +set cstore.compression = 'pglz'; COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; +set cstore.compression to default; -- Test column list CREATE TABLE famous_constants (id int, name text, value real) USING cstore_tableam; diff --git a/expected/am_truncate.out b/expected/am_truncate.out index 538b9ddac..99db7fe72 100644 --- a/expected/am_truncate.out +++ b/expected/am_truncate.out @@ -12,11 +12,14 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; -- CREATE a cstore_fdw table, fill with some data -- CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +-- COMPRESSED CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +set cstore.compression = 'pglz'; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +set cstore.compression to default; -- query rows SELECT * FROM cstore_truncate_test; a | b @@ -168,8 +171,11 @@ DROP TABLE cstore_truncate_test_regular; DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; +-- COMPRESSED CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; +set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; count ------- @@ -183,7 +189,9 @@ SELECT COUNT(*) FROM truncate_schema.truncate_tbl; 0 (1 row) +set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; -- create a user that can not truncate CREATE USER truncate_user; GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; diff --git a/input/am_block_filtering.source b/input/am_block_filtering.source index 7ca6862c7..f93eb1988 100644 --- a/input/am_block_filtering.source +++ b/input/am_block_filtering.source @@ -28,6 +28,9 @@ $$ LANGUAGE PLPGSQL; -- Create and load data +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; CREATE TABLE test_block_filtering (a int) USING cstore_tableam; @@ -55,6 +58,8 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 2 SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; -- Verify that we are fine with collations which use a different alphabet order CREATE TABLE collation_block_filtering_test(A text collate "da_DK") diff --git a/input/am_create.source b/input/am_create.source index 8a1612f7a..6d4d5a388 100644 --- a/input/am_create.source +++ b/input/am_create.source @@ -10,6 +10,7 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, -- Create compressed table with automatically determined file path +-- COMPRESSED CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; diff --git a/input/am_load.source b/input/am_load.source index c2ad581e8..d0ef9bfac 100644 --- a/input/am_load.source +++ b/input/am_load.source @@ -16,11 +16,13 @@ COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; -- COPY into compressed table +set cstore.compression = 'pglz'; COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +set cstore.compression to default; -- Test column list CREATE TABLE famous_constants (id int, name text, value real) diff --git a/output/am_block_filtering.source b/output/am_block_filtering.source index 2f664a78a..45cb702b6 100644 --- a/output/am_block_filtering.source +++ b/output/am_block_filtering.source @@ -24,9 +24,11 @@ $$ END; $$ LANGUAGE PLPGSQL; -- Create and load data -CREATE FOREIGN TABLE test_block_filtering (a int) - SERVER cstore_server - OPTIONS(block_row_count '1000', stripe_row_count '2000'); +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; +CREATE TABLE test_block_filtering (a int) + USING cstore_tableam; COPY test_block_filtering FROM '@abs_srcdir@/data/block_filtering.csv' WITH CSV; -- Verify that filtered_row_count is less than 1000 for the following queries SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); @@ -104,9 +106,11 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BET 3958 (1 row) +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; -- Verify that we are fine with collations which use a different alphabet order -CREATE FOREIGN TABLE collation_block_filtering_test(A text collate "da_DK") - SERVER cstore_server; +CREATE TABLE collation_block_filtering_test(A text collate "da_DK") + USING cstore_tableam; COPY collation_block_filtering_test FROM STDIN; SELECT * FROM collation_block_filtering_test WHERE A > 'B'; a diff --git a/output/am_create.source b/output/am_create.source index e62447252..47c6a6c44 100644 --- a/output/am_create.source +++ b/output/am_create.source @@ -6,6 +6,7 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; -- Create compressed table with automatically determined file path +-- COMPRESSED CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; diff --git a/output/am_load.source b/output/am_load.source index d1f41f717..5eb81a250 100644 --- a/output/am_load.source +++ b/output/am_load.source @@ -15,10 +15,12 @@ COPY contestant FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; -- COPY into compressed table +set cstore.compression = 'pglz'; COPY contestant_compressed FROM '@abs_srcdir@/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant_compressed FROM PROGRAM 'cat @abs_srcdir@/data/contestants.2.csv' WITH CSV; +set cstore.compression to default; -- Test column list CREATE TABLE famous_constants (id int, name text, value real) USING cstore_tableam; diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql index 38c63535c..2a45716ed 100644 --- a/sql/am_block_filtering.sql +++ b/sql/am_block_filtering.sql @@ -28,6 +28,9 @@ $$ LANGUAGE PLPGSQL; -- Create and load data +-- block_row_count '1000', stripe_row_count '2000' +set cstore.stripe_row_count = 2000; +set cstore.block_row_count = 1000; CREATE TABLE test_block_filtering (a int) USING cstore_tableam; @@ -55,6 +58,8 @@ SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 2 SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); +set cstore.stripe_row_count to default; +set cstore.block_row_count to default; -- Verify that we are fine with collations which use a different alphabet order CREATE TABLE collation_block_filtering_test(A text collate "da_DK") diff --git a/sql/am_create.sql b/sql/am_create.sql index 8a1612f7a..6d4d5a388 100644 --- a/sql/am_create.sql +++ b/sql/am_create.sql @@ -10,6 +10,7 @@ CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, -- Create compressed table with automatically determined file path +-- COMPRESSED CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, percentile FLOAT, country CHAR(3), achievements TEXT[]) USING cstore_tableam; diff --git a/sql/am_load.sql b/sql/am_load.sql index c7e9e5287..edc727b3c 100644 --- a/sql/am_load.sql +++ b/sql/am_load.sql @@ -16,11 +16,13 @@ COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; -- COPY into compressed table +set cstore.compression = 'pglz'; COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; -- COPY into uncompressed table from program COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; +set cstore.compression to default; -- Test column list CREATE TABLE famous_constants (id int, name text, value real) diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index e124a7831..3fdce1d82 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -9,13 +9,16 @@ SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; -- CREATE a cstore_fdw table, fill with some data -- CREATE TABLE cstore_truncate_test (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; +-- COMPRESSED CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; +set cstore.compression = 'pglz'; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; +set cstore.compression to default; -- query rows SELECT * FROM cstore_truncate_test; @@ -76,15 +79,19 @@ DROP TABLE cstore_truncate_test_compressed; -- test truncate with schema CREATE SCHEMA truncate_schema; +-- COMPRESSED CREATE TABLE truncate_schema.truncate_tbl (id int) USING cstore_tableam; +set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); +set cstore.compression to default; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; +set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); - +set cstore.compression to default; -- create a user that can not truncate CREATE USER truncate_user; GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; @@ -108,7 +115,6 @@ GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; SELECT count(*) FROM truncate_schema.truncate_tbl; TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT count(*) FROM truncate_schema.truncate_tbl; - \c - :current_user -- cleanup From a05e75a6d11d5638fee978ebbfa8b31bb5c2a5d4 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 11:59:28 -0700 Subject: [PATCH 052/124] fixup --- Makefile | 3 +-- cstore.c | 6 +++--- cstore_tableam.c | 2 ++ 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 033e8d4a6..ea5a858bf 100644 --- a/Makefile +++ b/Makefile @@ -53,8 +53,7 @@ ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_data_types am_functions \ - am_block_filtering am_drop am_insert am_copyto am_alter \ - am_truncate am_clean + am_drop am_insert am_copyto am_alter am_truncate am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore.c b/cstore.c index 1e8733003..3dd53dae4 100644 --- a/cstore.c +++ b/cstore.c @@ -33,9 +33,9 @@ int cstore_block_row_count = DEFAULT_BLOCK_ROW_COUNT; static const struct config_enum_entry cstore_compression_options[] = { - {"none", COMPRESSION_NONE, false}, - {"pglz", COMPRESSION_PG_LZ, false}, - {NULL, 0, false} + { "none", COMPRESSION_NONE, false }, + { "pglz", COMPRESSION_PG_LZ, false }, + { NULL, 0, false } }; void diff --git a/cstore_tableam.c b/cstore_tableam.c index 57ec2fa94..aa92f48cf 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -56,6 +56,7 @@ CStoreTableAMGetOptions(void) return cstoreOptions; } + static MemoryContext CStoreMemoryContext(void) { @@ -67,6 +68,7 @@ CStoreMemoryContext(void) return CStoreContext; } + static void cstore_init_write_state(Relation relation) { From c303f0f135e95080eae31480d4881d6fa1b9c742 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 18 Sep 2020 12:06:58 -0700 Subject: [PATCH 053/124] improve rel size estimate --- cstore_tableam.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index aa92f48cf..5849fb5ed 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -549,11 +549,17 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, BlockNumber *pages, double *tuples, double *allvisfrac) { - /* TODO */ - *attr_widths = 12; - *tuples = 100; - *pages = 10; + RelationOpenSmgr(rel); + *pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + *tuples = CStoreTableRowCount(rel); + + /* + * Append-only, so everything is visible except in-progress or rolled-back + * transactions. + */ *allvisfrac = 1.0; + + get_rel_data_width(rel, attr_widths); } From 8af9c91540dc76822e71e73ce26039a9362b168b Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Mon, 21 Sep 2020 18:13:14 -0700 Subject: [PATCH 054/124] address review comments --- cstore.c | 6 +++--- cstore_tableam.c | 34 +++++++++------------------------- cstore_tableam.h | 2 -- 3 files changed, 12 insertions(+), 30 deletions(-) diff --git a/cstore.c b/cstore.c index 3dd53dae4..f5846a029 100644 --- a/cstore.c +++ b/cstore.c @@ -42,7 +42,7 @@ void cstore_init() { DefineCustomEnumVariable("cstore.compression", - "Sets the maximum number of statements tracked by pg_stat_statements.", + "Compression type for cstore.", NULL, &cstore_compression, DEFAULT_COMPRESSION_TYPE, @@ -54,7 +54,7 @@ cstore_init() NULL); DefineCustomIntVariable("cstore.stripe_row_count", - "Sets the maximum number of statements tracked by pg_stat_statements.", + "Maximum number of tuples per stripe.", NULL, &cstore_stripe_row_count, DEFAULT_STRIPE_ROW_COUNT, @@ -67,7 +67,7 @@ cstore_init() NULL); DefineCustomIntVariable("cstore.block_row_count", - "Sets the maximum number of statements tracked by pg_stat_statements.", + "Maximum number of rows per block.", NULL, &cstore_block_row_count, DEFAULT_BLOCK_ROW_COUNT, diff --git a/cstore_tableam.c b/cstore_tableam.c index 5849fb5ed..312e10981 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -58,7 +58,7 @@ CStoreTableAMGetOptions(void) static MemoryContext -CStoreMemoryContext(void) +GetCStoreMemoryContext(void) { if (CStoreContext == NULL) { @@ -98,7 +98,7 @@ cstore_init_write_state(Relation relation) } -void +static void cstore_free_write_state() { if (CStoreWriteState != NULL) @@ -130,7 +130,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; - MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); cstoreOptions = CStoreTableAMGetOptions(); @@ -176,6 +176,7 @@ cstore_endscan(TableScanDesc sscan) { CStoreScanDesc scan = (CStoreScanDesc) sscan; CStoreEndRead(scan->cs_readState); + scan->cs_readState = NULL; } @@ -192,7 +193,7 @@ cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot { CStoreScanDesc scan = (CStoreScanDesc) sscan; bool nextRowFound; - MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); ExecClearTuple(slot); @@ -311,7 +312,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { HeapTuple heapTuple; - MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); cstore_init_write_state(relation); @@ -353,7 +354,7 @@ static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { - MemoryContext oldContext = MemoryContextSwitchTo(CStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); cstore_init_write_state(relation); @@ -563,23 +564,6 @@ cstore_estimate_rel_size(Relation rel, int32 *attr_widths, } -static bool -cstore_scan_bitmap_next_block(TableScanDesc scan, - TBMIterateResult *tbmres) -{ - elog(ERROR, "cstore_scan_bitmap_next_block not implemented"); -} - - -static bool -cstore_scan_bitmap_next_tuple(TableScanDesc scan, - TBMIterateResult *tbmres, - TupleTableSlot *slot) -{ - elog(ERROR, "cstore_scan_bitmap_next_tuple not implemented"); -} - - static bool cstore_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) { @@ -674,8 +658,8 @@ static const TableAmRoutine cstore_am_methods = { .relation_estimate_size = cstore_estimate_rel_size, - .scan_bitmap_next_block = cstore_scan_bitmap_next_block, - .scan_bitmap_next_tuple = cstore_scan_bitmap_next_tuple, + .scan_bitmap_next_block = NULL, + .scan_bitmap_next_tuple = NULL, .scan_sample_next_block = cstore_scan_sample_next_block, .scan_sample_next_tuple = cstore_scan_sample_next_tuple }; diff --git a/cstore_tableam.h b/cstore_tableam.h index 3a556728a..bdf7f96c0 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -3,7 +3,5 @@ #include "access/tableam.h" const TableAmRoutine * GetCstoreTableAmRoutine(void); -Datum cstore_tableam_handler(PG_FUNCTION_ARGS); -extern void cstore_free_write_state(void); extern void cstore_tableam_init(void); extern void cstore_tableam_finish(void); From bc585be3edef612f47cbdbb9db2f743bf60da14f Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 21 Sep 2020 15:53:21 -0700 Subject: [PATCH 055/124] Save blockRowCount in StripeMetadata --- .gitignore | 20 ++--- cstore.h | 33 +++++--- cstore_fdw--1.7.sql | 1 + cstore_metadata_tables.c | 8 +- cstore_reader.c | 174 ++++++++++++++++++--------------------- cstore_writer.c | 46 +++++------ 6 files changed, 140 insertions(+), 142 deletions(-) diff --git a/.gitignore b/.gitignore index 0c643e590..aa7be0e36 100644 --- a/.gitignore +++ b/.gitignore @@ -42,17 +42,17 @@ /data/*.cstore /data/*.footer -/sql/block_filtering.sql -/sql/copyto.sql -/sql/create.sql -/sql/data_types.sql -/sql/load.sql +/sql/*block_filtering.sql +/sql/*copyto.sql +/sql/*create.sql +/sql/*data_types.sql +/sql/*load.sql -/expected/block_filtering.out -/expected/copyto.out -/expected/create.out -/expected/data_types.out -/expected/load.out +/expected/*block_filtering.out +/expected/*copyto.out +/expected/*create.out +/expected/*data_types.out +/expected/*load.out /results/* /.deps/* /regression.diffs diff --git a/cstore.h b/cstore.h index ad0ad20bd..e48bced61 100644 --- a/cstore.h +++ b/cstore.h @@ -78,6 +78,7 @@ typedef struct StripeMetadata uint64 fileOffset; uint64 dataLength; uint32 blockCount; + uint32 blockRowCount; uint64 rowCount; uint64 id; } StripeMetadata; @@ -128,20 +129,27 @@ typedef struct StripeSkipList /* - * ColumnBlockData represents a block of data in a column. valueArray stores + * BlockData represents a block of data for multiple columns. valueArray stores * the values of data, and existsArray stores whether a value is present. * valueBuffer is used to store (uncompressed) serialized values * referenced by Datum's in valueArray. It is only used for by-reference Datum's. * There is a one-to-one correspondence between valueArray and existsArray. */ -typedef struct ColumnBlockData +typedef struct BlockData { - bool *existsArray; - Datum *valueArray; + uint32 rowCount; + uint32 columnCount; + + /* + * Following are indexed by [column][row]. If a column is not projected, + * then existsArray[column] and valueArray[column] are NULL. + */ + bool **existsArray; + Datum **valueArray; /* valueBuffer keeps actual data for type-by-reference datums from valueArray. */ - StringInfo valueBuffer; -} ColumnBlockData; + StringInfo *valueBufferArray; +} BlockData; /* @@ -197,6 +205,7 @@ typedef struct TableReadState Oid relationId; TableMetadata *tableMetadata; + StripeMetadata *currentStripeMetadata; TupleDesc tupleDescriptor; Relation relation; @@ -212,7 +221,7 @@ typedef struct TableReadState StripeBuffers *stripeBuffers; uint32 readStripeCount; uint64 stripeReadRowCount; - ColumnBlockData **blockDataArray; + BlockData *blockData; int32 deserializedBlockIndex; } TableReadState; @@ -233,7 +242,8 @@ typedef struct TableWriteState StripeBuffers *stripeBuffers; StripeSkipList *stripeSkipList; uint32 stripeMaxRowCount; - ColumnBlockData **blockDataArray; + uint32 blockRowCount; + BlockData *blockData; /* * compressionBuffer buffer is used as temporary storage during @@ -276,10 +286,9 @@ extern void CStoreEndRead(TableReadState *state); /* Function declarations for common functions */ extern FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, int16 procedureId); -extern ColumnBlockData ** CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, - uint32 blockRowCount); -extern void FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, - uint32 columnCount); +extern BlockData * CreateEmptyBlockData(uint32 columnCount, bool *columnMask, + uint32 blockRowCount); +extern void FreeBlockData(BlockData *blockData); extern uint64 CStoreTableRowCount(Relation relation); extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 19801f1f8..d98652b6d 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -74,6 +74,7 @@ CREATE TABLE cstore_stripes ( file_offset bigint NOT NULL, data_length bigint NOT NULL, block_count int NOT NULL, + block_row_count int NOT NULL, row_count bigint NOT NULL, PRIMARY KEY (relid, stripe), FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 8a67a3a9e..5285295b9 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -79,13 +79,14 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); #define Anum_cstore_tables_version_minor 4 /* constants for cstore_stripe */ -#define Natts_cstore_stripes 6 +#define Natts_cstore_stripes 7 #define Anum_cstore_stripes_relid 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 #define Anum_cstore_stripes_data_length 4 #define Anum_cstore_stripes_block_count 5 -#define Anum_cstore_stripes_row_count 6 +#define Anum_cstore_stripes_block_row_count 6 +#define Anum_cstore_stripes_row_count 7 /* constants for cstore_skipnodes */ #define Natts_cstore_skipnodes 12 @@ -328,6 +329,7 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) Int64GetDatum(stripe->fileOffset), Int64GetDatum(stripe->dataLength), Int32GetDatum(stripe->blockCount), + Int32GetDatum(stripe->blockRowCount), Int64GetDatum(stripe->rowCount) }; @@ -388,6 +390,8 @@ ReadTableMetadata(Oid relid) datumArray[Anum_cstore_stripes_data_length - 1]); stripeMetadata->blockCount = DatumGetInt32( datumArray[Anum_cstore_stripes_block_count - 1]); + stripeMetadata->blockRowCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_block_row_count - 1]); stripeMetadata->rowCount = DatumGetInt64( datumArray[Anum_cstore_stripes_row_count - 1]); diff --git a/cstore_reader.c b/cstore_reader.c index fecb45605..caf07473f 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -45,8 +45,8 @@ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, List *whereClauseList); static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, uint64 blockIndex, uint64 blockRowIndex, - ColumnBlockData **blockDataArray, - Datum *columnValues, bool *columnNulls); + BlockData *blockData, Datum *columnValues, + bool *columnNulls); static ColumnBuffers * LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, uint64 existsFileOffset, @@ -70,15 +70,12 @@ static void DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCount, bool datumTypeByValue, int datumTypeLength, char datumTypeAlign, Datum *datumArray); -static void DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, - uint32 rowCount, ColumnBlockData **blockDataArray, - TupleDesc tupleDescriptor); +static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, + uint32 rowCount, TupleDesc tupleDescriptor, + List *projectedColumnList); static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm); static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); -static void ResetUncompressedBlockData(ColumnBlockData **blockDataArray, - uint32 columnCount); - /* * CStoreBeginRead initializes a cstore read operation. This function returns a @@ -91,9 +88,6 @@ CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, TableReadState *readState = NULL; TableMetadata *tableMetadata = NULL; MemoryContext stripeReadContext = NULL; - uint32 columnCount = 0; - bool *projectedColumnMask = NULL; - ColumnBlockData **blockDataArray = NULL; tableMetadata = ReadTableMetadata(relationId); @@ -106,11 +100,6 @@ CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, "Stripe Read Memory Context", ALLOCSET_DEFAULT_SIZES); - columnCount = tupleDescriptor->natts; - projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - blockDataArray = CreateEmptyBlockDataArray(columnCount, projectedColumnMask, - tableMetadata->blockRowCount); - readState = palloc0(sizeof(TableReadState)); readState->relationId = relationId; readState->tableMetadata = tableMetadata; @@ -121,7 +110,7 @@ CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, readState->stripeReadRowCount = 0; readState->tupleDescriptor = tupleDescriptor; readState->stripeReadContext = stripeReadContext; - readState->blockDataArray = blockDataArray; + readState->blockData = NULL; readState->deserializedBlockIndex = -1; return readState; @@ -138,7 +127,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { uint32 blockIndex = 0; uint32 blockRowIndex = 0; - TableMetadata *tableMetadata = readState->tableMetadata; + StripeMetadata *stripeMetadata = readState->currentStripeMetadata; MemoryContext oldContext = NULL; /* @@ -151,7 +140,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { StripeBuffers *stripeBuffers = NULL; StripeMetadata *stripeMetadata = NULL; - List *stripeMetadataList = tableMetadata->stripeMetadataList; + List *stripeMetadataList = readState->tableMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); StripeFooter *stripeFooter = NULL; @@ -163,6 +152,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu oldContext = MemoryContextSwitchTo(readState->stripeReadContext); MemoryContextReset(readState->stripeReadContext); + readState->blockData = NULL; stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); stripeFooter = ReadStripeFooter(readState->relationId, @@ -175,6 +165,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu readState->projectedColumnList, readState->whereClauseList); readState->readStripeCount++; + readState->currentStripeMetadata = stripeMetadata; MemoryContextSwitchTo(oldContext); @@ -183,37 +174,38 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu readState->stripeBuffers = stripeBuffers; readState->stripeReadRowCount = 0; readState->deserializedBlockIndex = -1; - ResetUncompressedBlockData(readState->blockDataArray, - stripeBuffers->columnCount); break; } } - blockIndex = readState->stripeReadRowCount / tableMetadata->blockRowCount; - blockRowIndex = readState->stripeReadRowCount % tableMetadata->blockRowCount; + blockIndex = readState->stripeReadRowCount / stripeMetadata->blockRowCount; + blockRowIndex = readState->stripeReadRowCount % stripeMetadata->blockRowCount; if (blockIndex != readState->deserializedBlockIndex) { uint32 lastBlockIndex = 0; uint32 blockRowCount = 0; uint32 stripeRowCount = 0; + StripeMetadata *stripeMetadata = readState->currentStripeMetadata; - stripeRowCount = readState->stripeBuffers->rowCount; - lastBlockIndex = stripeRowCount / tableMetadata->blockRowCount; + stripeRowCount = stripeMetadata->rowCount; + lastBlockIndex = stripeRowCount / stripeMetadata->blockRowCount; if (blockIndex == lastBlockIndex) { - blockRowCount = stripeRowCount % tableMetadata->blockRowCount; + blockRowCount = stripeRowCount % stripeMetadata->blockRowCount; } else { - blockRowCount = tableMetadata->blockRowCount; + blockRowCount = stripeMetadata->blockRowCount; } oldContext = MemoryContextSwitchTo(readState->stripeReadContext); - DeserializeBlockData(readState->stripeBuffers, blockIndex, - blockRowCount, readState->blockDataArray, - readState->tupleDescriptor); + FreeBlockData(readState->blockData); + readState->blockData = + DeserializeBlockData(readState->stripeBuffers, blockIndex, + blockRowCount, readState->tupleDescriptor, + readState->projectedColumnList); MemoryContextSwitchTo(oldContext); @@ -221,7 +213,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu } ReadStripeNextRow(readState->stripeBuffers, readState->projectedColumnList, - blockIndex, blockRowIndex, readState->blockDataArray, + blockIndex, blockRowIndex, readState->blockData, columnValues, columnNulls); /* @@ -242,11 +234,8 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu void CStoreEndRead(TableReadState *readState) { - int columnCount = readState->tupleDescriptor->natts; - MemoryContextDelete(readState->stripeReadContext); list_free_deep(readState->tableMetadata->stripeMetadataList); - FreeColumnBlockDataArray(readState->blockDataArray, columnCount); pfree(readState->tableMetadata); pfree(readState); } @@ -256,52 +245,65 @@ CStoreEndRead(TableReadState *readState) * CreateEmptyBlockDataArray creates data buffers to keep deserialized exist and * value arrays for requested columns in columnMask. */ -ColumnBlockData ** -CreateEmptyBlockDataArray(uint32 columnCount, bool *columnMask, uint32 blockRowCount) +BlockData * +CreateEmptyBlockData(uint32 columnCount, bool *columnMask, uint32 blockRowCount) { uint32 columnIndex = 0; - ColumnBlockData **blockDataArray = palloc0(columnCount * sizeof(ColumnBlockData *)); + + BlockData *blockData = palloc0(sizeof(BlockData)); + blockData->existsArray = palloc0(columnCount * sizeof(bool *)); + blockData->valueArray = palloc0(columnCount * sizeof(Datum *)); + blockData->valueBufferArray = palloc0(columnCount * sizeof(StringInfo)); + blockData->columnCount = columnCount; + blockData->rowCount = blockRowCount; /* allocate block memory for deserialized data */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { if (columnMask[columnIndex]) { - ColumnBlockData *blockData = palloc0(sizeof(ColumnBlockData)); - - blockData->existsArray = palloc0(blockRowCount * sizeof(bool)); - blockData->valueArray = palloc0(blockRowCount * sizeof(Datum)); - blockData->valueBuffer = NULL; - blockDataArray[columnIndex] = blockData; + blockData->existsArray[columnIndex] = palloc0(blockRowCount * sizeof(bool)); + blockData->valueArray[columnIndex] = palloc0(blockRowCount * sizeof(Datum)); + blockData->valueBufferArray[columnIndex] = NULL; } } - return blockDataArray; + return blockData; } /* - * FreeColumnBlockDataArray deallocates data buffers to keep deserialized exist and + * FreeBlockData deallocates data buffers to keep deserialized exist and * value arrays for requested columns in columnMask. * ColumnBlockData->serializedValueBuffer lives in memory read/write context * so it is deallocated automatically when the context is deleted. */ void -FreeColumnBlockDataArray(ColumnBlockData **blockDataArray, uint32 columnCount) +FreeBlockData(BlockData *blockData) { uint32 columnIndex = 0; - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + + if (blockData == NULL) { - ColumnBlockData *blockData = blockDataArray[columnIndex]; - if (blockData != NULL) + return; + } + + for (columnIndex = 0; columnIndex < blockData->columnCount; columnIndex++) + { + if (blockData->existsArray[columnIndex] != NULL) { - pfree(blockData->existsArray); - pfree(blockData->valueArray); - pfree(blockData); + pfree(blockData->existsArray[columnIndex]); + } + + if (blockData->valueArray[columnIndex] != NULL) + { + pfree(blockData->valueArray[columnIndex]); } } - pfree(blockDataArray); + pfree(blockData->existsArray); + pfree(blockData->valueArray); + pfree(blockData); } @@ -403,7 +405,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, uint64 blockIndex, uint64 blockRowIndex, - ColumnBlockData **blockDataArray, Datum *columnValues, + BlockData *blockData, Datum *columnValues, bool *columnNulls) { ListCell *projectedColumnCell = NULL; @@ -414,13 +416,12 @@ ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, foreach(projectedColumnCell, projectedColumnList) { Var *projectedColumn = lfirst(projectedColumnCell); - uint32 projectedColumnIndex = projectedColumn->varattno - 1; - ColumnBlockData *blockData = blockDataArray[projectedColumnIndex]; + uint32 columnIndex = projectedColumn->varattno - 1; - if (blockData->existsArray[blockRowIndex]) + if (blockData->existsArray[columnIndex][blockRowIndex]) { - columnValues[projectedColumnIndex] = blockData->valueArray[blockRowIndex]; - columnNulls[projectedColumnIndex] = false; + columnValues[columnIndex] = blockData->valueArray[columnIndex][blockRowIndex]; + columnNulls[columnIndex] = false; } } } @@ -919,20 +920,23 @@ DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCou * data is not present serialized buffer, then default value (or null) is used * to fill value array. */ -static void +static BlockData * DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, - uint32 rowCount, - ColumnBlockData **blockDataArray, TupleDesc tupleDescriptor) + uint32 rowCount, TupleDesc tupleDescriptor, + List *projectedColumnList) { int columnIndex = 0; + bool *columnMask = ProjectedColumnMask(tupleDescriptor->natts, projectedColumnList); + BlockData *blockData = CreateEmptyBlockData(tupleDescriptor->natts, columnMask, + rowCount); + for (columnIndex = 0; columnIndex < stripeBuffers->columnCount; columnIndex++) { - ColumnBlockData *blockData = blockDataArray[columnIndex]; Form_pg_attribute attributeForm = TupleDescAttr(tupleDescriptor, columnIndex); ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; bool columnAdded = false; - if ((columnBuffers == NULL) && (blockData != NULL)) + if (columnBuffers == NULL && columnMask[columnIndex]) { columnAdded = true; } @@ -943,10 +947,6 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = NULL; - /* free previous block's data buffers */ - pfree(blockData->valueBuffer->data); - pfree(blockData->valueBuffer); - /* decompress and deserialize current block's data */ valueBuffer = DecompressBuffer(blockBuffers->valueBuffer, blockBuffers->valueCompressionType); @@ -958,15 +958,16 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, pfree(blockBuffers->valueBuffer); } - DeserializeBoolArray(blockBuffers->existsBuffer, blockData->existsArray, + DeserializeBoolArray(blockBuffers->existsBuffer, + blockData->existsArray[columnIndex], rowCount); - DeserializeDatumArray(valueBuffer, blockData->existsArray, + DeserializeDatumArray(valueBuffer, blockData->existsArray[columnIndex], rowCount, attributeForm->attbyval, attributeForm->attlen, attributeForm->attalign, - blockData->valueArray); + blockData->valueArray[columnIndex]); /* store current block's data buffer to be freed at next block read */ - blockData->valueBuffer = valueBuffer; + blockData->valueBufferArray[columnIndex] = valueBuffer; } else if (columnAdded) { @@ -983,16 +984,19 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, for (rowIndex = 0; rowIndex < rowCount; rowIndex++) { - blockData->existsArray[rowIndex] = true; - blockData->valueArray[rowIndex] = defaultValue; + blockData->existsArray[columnIndex][rowIndex] = true; + blockData->valueArray[columnIndex][rowIndex] = defaultValue; } } else { - memset(blockData->existsArray, false, rowCount); + memset(blockData->existsArray[columnIndex], false, + rowCount * sizeof(bool)); } } } + + return blockData; } @@ -1067,23 +1071,3 @@ ReadFromSmgr(Relation rel, uint64 offset, uint32 size) return resultBuffer; } - - -/* - * ResetUncompressedBlockData iterates over deserialized column block data - * and sets valueBuffer field to empty buffer. This field is allocated in stripe - * memory context and becomes invalid once memory context is reset. - */ -static void -ResetUncompressedBlockData(ColumnBlockData **blockDataArray, uint32 columnCount) -{ - uint32 columnIndex = 0; - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - ColumnBlockData *blockData = blockDataArray[columnIndex]; - if (blockData != NULL) - { - blockData->valueBuffer = makeStringInfo(); - } - } -} diff --git a/cstore_writer.c b/cstore_writer.c index 55a314ec4..cf0fa58fe 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -72,7 +72,7 @@ CStoreBeginWrite(Oid relationId, uint32 columnCount = 0; uint32 columnIndex = 0; bool *columnMaskArray = NULL; - ColumnBlockData **blockData = NULL; + BlockData *blockData = NULL; uint64 currentStripeId = 0; tableMetadata = ReadTableMetadata(relationId); @@ -125,20 +125,21 @@ CStoreBeginWrite(Oid relationId, columnMaskArray = palloc(columnCount * sizeof(bool)); memset(columnMaskArray, true, columnCount); - blockData = CreateEmptyBlockDataArray(columnCount, columnMaskArray, blockRowCount); + blockData = CreateEmptyBlockData(columnCount, columnMaskArray, blockRowCount); writeState = palloc0(sizeof(TableWriteState)); writeState->relationId = relationId; writeState->tableMetadata = tableMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; + writeState->blockRowCount = blockRowCount; writeState->tupleDescriptor = tupleDescriptor; writeState->currentFileOffset = currentFileOffset; writeState->comparisonFunctionArray = comparisonFunctionArray; writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; writeState->stripeWriteContext = stripeWriteContext; - writeState->blockDataArray = blockData; + writeState->blockData = blockData; writeState->compressionBuffer = NULL; writeState->currentStripeId = currentStripeId; @@ -164,8 +165,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; TableMetadata *tableMetadata = writeState->tableMetadata; - const uint32 blockRowCount = tableMetadata->blockRowCount; - ColumnBlockData **blockDataArray = writeState->blockDataArray; + const uint32 blockRowCount = writeState->blockRowCount; + BlockData *blockData = writeState->blockData; MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); if (stripeBuffers == NULL) @@ -184,8 +185,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { - ColumnBlockData *blockData = blockDataArray[columnIndex]; - blockData->valueBuffer = makeStringInfo(); + blockData->valueBufferArray[columnIndex] = makeStringInfo(); } } @@ -194,14 +194,13 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { - ColumnBlockData *blockData = blockDataArray[columnIndex]; ColumnBlockSkipNode **blockSkipNodeArray = stripeSkipList->blockSkipNodeArray; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[columnIndex][blockIndex]; if (columnNulls[columnIndex]) { - blockData->existsArray[blockRowIndex] = false; + blockData->existsArray[columnIndex][blockRowIndex] = false; } else { @@ -214,10 +213,11 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul Oid columnCollation = attributeForm->attcollation; char columnTypeAlign = attributeForm->attalign; - blockData->existsArray[blockRowIndex] = true; + blockData->existsArray[columnIndex][blockRowIndex] = true; - SerializeSingleDatum(blockData->valueBuffer, columnValues[columnIndex], - columnTypeByValue, columnTypeLength, columnTypeAlign); + SerializeSingleDatum(blockData->valueBufferArray[columnIndex], + columnValues[columnIndex], columnTypeByValue, + columnTypeLength, columnTypeAlign); UpdateBlockSkipNodeMinMax(blockSkipNode, columnValues[columnIndex], columnTypeByValue, columnTypeLength, @@ -271,7 +271,6 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul void CStoreEndWrite(TableWriteState *writeState) { - int columnCount = writeState->tupleDescriptor->natts; StripeBuffers *stripeBuffers = writeState->stripeBuffers; if (stripeBuffers != NULL) @@ -289,7 +288,7 @@ CStoreEndWrite(TableWriteState *writeState) MemoryContextDelete(writeState->stripeWriteContext); list_free_deep(writeState->tableMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); - FreeColumnBlockDataArray(writeState->blockDataArray, columnCount); + FreeBlockData(writeState->blockData); pfree(writeState); } @@ -415,6 +414,8 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) if (RelationNeedsWAL(rel)) { + XLogRecPtr recptr = 0; + XLogBeginInsert(); /* @@ -423,7 +424,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) */ XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE); - XLogRecPtr recptr = XLogInsert(RM_GENERIC_ID, 0); + recptr = XLogInsert(RM_GENERIC_ID, 0); PageSetLSN(page, recptr); } @@ -452,14 +453,13 @@ FlushStripe(TableWriteState *writeState) StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; - TableMetadata *tableMetadata = writeState->tableMetadata; StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; ColumnBlockSkipNode **columnSkipNodeArray = stripeSkipList->blockSkipNodeArray; TupleDesc tupleDescriptor = writeState->tupleDescriptor; uint32 columnCount = tupleDescriptor->natts; uint32 blockCount = stripeSkipList->blockCount; - uint32 blockRowCount = tableMetadata->blockRowCount; + uint32 blockRowCount = writeState->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; uint64 initialFileOffset = writeState->currentFileOffset; @@ -565,6 +565,7 @@ FlushStripe(TableWriteState *writeState) stripeMetadata.dataLength = dataLength; stripeMetadata.id = writeState->currentStripeId; stripeMetadata.blockCount = blockCount; + stripeMetadata.blockRowCount = writeState->blockRowCount; return stripeMetadata; } @@ -679,7 +680,7 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou { uint32 columnIndex = 0; StripeBuffers *stripeBuffers = writeState->stripeBuffers; - ColumnBlockData **blockDataArray = writeState->blockDataArray; + BlockData *blockData = writeState->blockData; CompressionType requestedCompressionType = writeState->compressionType; const uint32 columnCount = stripeBuffers->columnCount; StringInfo compressionBuffer = writeState->compressionBuffer; @@ -689,9 +690,9 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; - ColumnBlockData *blockData = blockDataArray[columnIndex]; - blockBuffers->existsBuffer = SerializeBoolArray(blockData->existsArray, rowCount); + blockBuffers->existsBuffer = + SerializeBoolArray(blockData->existsArray[columnIndex], rowCount); } /* @@ -702,12 +703,11 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; - ColumnBlockData *blockData = blockDataArray[columnIndex]; StringInfo serializedValueBuffer = NULL; CompressionType actualCompressionType = COMPRESSION_NONE; bool compressed = false; - serializedValueBuffer = blockData->valueBuffer; + serializedValueBuffer = blockData->valueBufferArray[columnIndex]; /* the only other supported compression type is pg_lz for now */ Assert(requestedCompressionType == COMPRESSION_NONE || @@ -730,7 +730,7 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou blockBuffers->valueBuffer = CopyStringInfo(serializedValueBuffer); /* valueBuffer needs to be reset for next block's data */ - resetStringInfo(blockData->valueBuffer); + resetStringInfo(blockData->valueBufferArray[columnIndex]); } } From db5287069ff8b765746470e199e25fce3b979a2f Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 21 Sep 2020 22:10:25 -0700 Subject: [PATCH 056/124] Make block offsets relative to stripe start --- cstore.h | 1 + cstore_reader.c | 14 +++++--------- cstore_writer.c | 30 ++++++++++++++++++++++-------- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/cstore.h b/cstore.h index e48bced61..f88ebbdab 100644 --- a/cstore.h +++ b/cstore.h @@ -235,6 +235,7 @@ typedef struct TableWriteState TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; uint64 currentFileOffset; + uint64 currentStripeOffset; Relation relation; MemoryContext stripeWriteContext; diff --git a/cstore_reader.c b/cstore_reader.c index caf07473f..6b5d7ed00 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -49,8 +49,7 @@ static void ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColum bool *columnNulls); static ColumnBuffers * LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, - uint32 blockCount, uint64 existsFileOffset, - uint64 valueFileOffset, + uint32 blockCount, uint64 stripeOffset, Form_pg_attribute attributeForm); static bool * SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, List *whereClauseList); @@ -365,8 +364,6 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, { uint64 existsSize = stripeFooter->existsSizeArray[columnIndex]; uint64 valueSize = stripeFooter->valueSizeArray[columnIndex]; - uint64 existsFileOffset = currentColumnFileOffset; - uint64 valueFileOffset = currentColumnFileOffset + existsSize; if (projectedColumnMask[columnIndex]) { @@ -377,8 +374,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, ColumnBuffers *columnBuffers = LoadColumnBuffers(relation, blockSkipNode, blockCount, - existsFileOffset, - valueFileOffset, + stripeMetadata->fileOffset, attributeForm); columnBuffersArray[columnIndex] = columnBuffers; @@ -434,7 +430,7 @@ ReadStripeNextRow(StripeBuffers *stripeBuffers, List *projectedColumnList, */ static ColumnBuffers * LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, - uint32 blockCount, uint64 existsFileOffset, uint64 valueFileOffset, + uint32 blockCount, uint64 stripeOffset, Form_pg_attribute attributeForm) { ColumnBuffers *columnBuffers = NULL; @@ -455,7 +451,7 @@ LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; - uint64 existsOffset = existsFileOffset + blockSkipNode->existsBlockOffset; + uint64 existsOffset = stripeOffset + blockSkipNode->existsBlockOffset; StringInfo rawExistsBuffer = ReadFromSmgr(relation, existsOffset, blockSkipNode->existsLength); @@ -467,7 +463,7 @@ LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, { ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; CompressionType compressionType = blockSkipNode->valueCompressionType; - uint64 valueOffset = valueFileOffset + blockSkipNode->valueBlockOffset; + uint64 valueOffset = stripeOffset + blockSkipNode->valueBlockOffset; StringInfo rawValueBuffer = ReadFromSmgr(relation, valueOffset, blockSkipNode->valueLength); diff --git a/cstore_writer.c b/cstore_writer.c index cf0fa58fe..65871b511 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -242,6 +242,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul MemoryContextReset(writeState->stripeWriteContext); writeState->currentStripeId++; + writeState->currentStripeOffset = 0; /* set stripe data and skip list to NULL so they are recreated next time */ writeState->stripeBuffers = NULL; @@ -473,12 +474,10 @@ FlushStripe(TableWriteState *writeState) SerializeBlockData(writeState, lastBlockIndex, lastBlockRowCount); } - /* update buffer sizes and positions in stripe skip list */ + /* update buffer sizes in stripe skip list */ for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; - uint64 currentExistsBlockOffset = 0; - uint64 currentValueBlockOffset = 0; ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) @@ -486,21 +485,36 @@ FlushStripe(TableWriteState *writeState) ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; uint64 existsBufferSize = blockBuffers->existsBuffer->len; + ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; + + blockSkipNode->existsBlockOffset = writeState->currentStripeOffset; + blockSkipNode->existsLength = existsBufferSize; + writeState->currentStripeOffset += existsBufferSize; + } + } + + for (columnIndex = 0; columnIndex < columnCount; columnIndex++) + { + ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; + ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; + + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + ColumnBlockBuffers *blockBuffers = + columnBuffers->blockBuffersArray[blockIndex]; uint64 valueBufferSize = blockBuffers->valueBuffer->len; CompressionType valueCompressionType = blockBuffers->valueCompressionType; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; - blockSkipNode->existsBlockOffset = currentExistsBlockOffset; - blockSkipNode->existsLength = existsBufferSize; - blockSkipNode->valueBlockOffset = currentValueBlockOffset; + blockSkipNode->valueBlockOffset = writeState->currentStripeOffset; blockSkipNode->valueLength = valueBufferSize; blockSkipNode->valueCompressionType = valueCompressionType; - currentExistsBlockOffset += existsBufferSize; - currentValueBlockOffset += valueBufferSize; + writeState->currentStripeOffset += valueBufferSize; } } + /* create skip list and footer buffers */ SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, stripeSkipList, tupleDescriptor); From 1b45cfb52e2f6a5dc470cc0abdc77fe7a65d7f7f Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 23 Sep 2020 09:53:32 -0700 Subject: [PATCH 057/124] remove generated sql test files --- sql/am_block_filtering.sql | 73 -------------------------------------- sql/am_copyto.sql | 17 --------- sql/am_create.sql | 20 ----------- sql/am_data_types.sql | 68 ----------------------------------- sql/am_load.sql | 46 ------------------------ 5 files changed, 224 deletions(-) delete mode 100644 sql/am_block_filtering.sql delete mode 100644 sql/am_copyto.sql delete mode 100644 sql/am_create.sql delete mode 100644 sql/am_data_types.sql delete mode 100644 sql/am_load.sql diff --git a/sql/am_block_filtering.sql b/sql/am_block_filtering.sql deleted file mode 100644 index 2a45716ed..000000000 --- a/sql/am_block_filtering.sql +++ /dev/null @@ -1,73 +0,0 @@ --- --- Test block filtering in cstore_fdw using min/max values in stripe skip lists. --- - - --- --- filtered_row_count returns number of rows filtered by the WHERE clause. --- If blocks get filtered by cstore_fdw, less rows are passed to WHERE --- clause, so this function should return a lower number. --- -CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS -$$ - DECLARE - result bigint; - rec text; - BEGIN - result := 0; - - FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP - IF rec ~ '^\s+Rows Removed by Filter' then - result := regexp_replace(rec, '[^0-9]*', '', 'g'); - END IF; - END LOOP; - - RETURN result; - END; -$$ LANGUAGE PLPGSQL; - - --- Create and load data --- block_row_count '1000', stripe_row_count '2000' -set cstore.stripe_row_count = 2000; -set cstore.block_row_count = 1000; -CREATE TABLE test_block_filtering (a int) - USING cstore_tableam; - -COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; - - --- Verify that filtered_row_count is less than 1000 for the following queries -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); - - --- Verify that filtered_row_count is less than 2000 for the following queries -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); - - --- Load data for second time and verify that filtered_row_count is exactly twice as before -COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); - -set cstore.stripe_row_count to default; -set cstore.block_row_count to default; - --- Verify that we are fine with collations which use a different alphabet order -CREATE TABLE collation_block_filtering_test(A text collate "da_DK") - USING cstore_tableam; -COPY collation_block_filtering_test FROM STDIN; -A -Å -B -\. - -SELECT * FROM collation_block_filtering_test WHERE A > 'B'; diff --git a/sql/am_copyto.sql b/sql/am_copyto.sql deleted file mode 100644 index 7288ff66f..000000000 --- a/sql/am_copyto.sql +++ /dev/null @@ -1,17 +0,0 @@ --- --- Test copying data from cstore_fdw tables. --- -CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; - --- load table data from file -COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; - --- export using COPY table TO ... -COPY test_contestant TO STDOUT; - --- export using COPY (SELECT * FROM table) TO ... -COPY (select * from test_contestant) TO STDOUT; - -DROP TABLE test_contestant CASCADE; diff --git a/sql/am_create.sql b/sql/am_create.sql deleted file mode 100644 index 6d4d5a388..000000000 --- a/sql/am_create.sql +++ /dev/null @@ -1,20 +0,0 @@ --- --- Test the CREATE statements related to cstore. --- - - --- Create uncompressed table -CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; - - --- Create compressed table with automatically determined file path --- COMPRESSED -CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; - --- Test that querying an empty table works -ANALYZE contestant; -SELECT count(*) FROM contestant; diff --git a/sql/am_data_types.sql b/sql/am_data_types.sql deleted file mode 100644 index b2668e71f..000000000 --- a/sql/am_data_types.sql +++ /dev/null @@ -1,68 +0,0 @@ --- --- Test loading and reading different data types to/from cstore_fdw foreign tables. --- - - --- Settings to make the result deterministic -SET datestyle = "ISO, YMD"; -SET timezone to 'GMT'; -SET intervalstyle TO 'POSTGRES_VERBOSE'; - - --- Test array types -CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) USING cstore_tableam; - -COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; - -SELECT * FROM test_array_types; - - --- Test date/time types -CREATE TABLE test_datetime_types (timestamp timestamp, - timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) USING cstore_tableam; - -COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; - -SELECT * FROM test_datetime_types; - - --- Test enum and composite types -CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); -CREATE TYPE composite_type AS (a int, b text); - -CREATE TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) USING cstore_tableam; - -COPY test_enum_and_composite_types FROM - '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; - -SELECT * FROM test_enum_and_composite_types; - - --- Test range types -CREATE TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) USING cstore_tableam; - -COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; - -SELECT * FROM test_range_types; - - --- Test other types -CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; - -COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; - -SELECT * FROM test_other_types; - - --- Test null values -CREATE TABLE test_null_values (a int, b int[], c composite_type) - USING cstore_tableam; - -COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; - -SELECT * FROM test_null_values; diff --git a/sql/am_load.sql b/sql/am_load.sql deleted file mode 100644 index edc727b3c..000000000 --- a/sql/am_load.sql +++ /dev/null @@ -1,46 +0,0 @@ --- --- Test loading data into cstore_fdw tables. --- - --- COPY with incorrect delimiter -COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' - WITH DELIMITER '|'; -- ERROR - --- COPY with invalid program -COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR - --- COPY into uncompressed table from file -COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; - --- COPY into uncompressed table from program -COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; - --- COPY into compressed table -set cstore.compression = 'pglz'; -COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; - --- COPY into uncompressed table from program -COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' - WITH CSV; -set cstore.compression to default; - --- Test column list -CREATE TABLE famous_constants (id int, name text, value real) - USING cstore_tableam; -COPY famous_constants (value, name, id) FROM STDIN WITH CSV; -3.141,pi,1 -2.718,e,2 -0.577,gamma,3 -5.291e-11,bohr radius,4 -\. - -COPY famous_constants (name, value) FROM STDIN WITH CSV; -avagadro,6.022e23 -electron mass,9.109e-31 -proton mass,1.672e-27 -speed of light,2.997e8 -\. - -SELECT * FROM famous_constants ORDER BY id, name; - -DROP TABLE famous_constants; From a34cdeb83c3b815175d818a980db3c723e0ca984 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 21 Sep 2020 22:59:21 -0700 Subject: [PATCH 058/124] Remove StripeFooter --- cstore.h | 18 +---- cstore_fdw--1.7.sql | 15 +---- cstore_metadata_tables.c | 141 +++------------------------------------ cstore_reader.c | 20 +----- cstore_writer.c | 74 +++----------------- 5 files changed, 24 insertions(+), 244 deletions(-) diff --git a/cstore.h b/cstore.h index f88ebbdab..96fa1ed53 100644 --- a/cstore.h +++ b/cstore.h @@ -77,6 +77,7 @@ typedef struct StripeMetadata { uint64 fileOffset; uint64 dataLength; + uint32 columnCount; uint32 blockCount; uint32 blockRowCount; uint64 rowCount; @@ -186,19 +187,6 @@ typedef struct StripeBuffers } StripeBuffers; -/* - * StripeFooter represents a stripe's footer. In this footer, we keep three - * arrays of sizes. The number of elements in each of the arrays is equal - * to the number of columns. - */ -typedef struct StripeFooter -{ - uint32 columnCount; - uint64 *existsSizeArray; - uint64 *valueSizeArray; -} StripeFooter; - - /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { @@ -235,7 +223,6 @@ typedef struct TableWriteState TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; uint64 currentFileOffset; - uint64 currentStripeOffset; Relation relation; MemoryContext stripeWriteContext; @@ -296,9 +283,6 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); /* cstore_metadata_tables.c */ -extern void SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer); -extern StripeFooter * ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount); - extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); extern TableMetadata * ReadTableMetadata(Oid relid); diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index d98652b6d..84b69be07 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -73,6 +73,7 @@ CREATE TABLE cstore_stripes ( stripe bigint NOT NULL, file_offset bigint NOT NULL, data_length bigint NOT NULL, + column_count int NOT NULL, block_count int NOT NULL, block_row_count int NOT NULL, row_count bigint NOT NULL, @@ -82,18 +83,6 @@ CREATE TABLE cstore_stripes ( COMMENT ON TABLE cstore_tables IS 'CStore per stripe metadata'; -CREATE TABLE cstore_stripe_attr ( - relid oid NOT NULL, - stripe bigint NOT NULL, - attr int NOT NULL, - exists_size bigint NOT NULL, - value_size bigint NOT NULL, - PRIMARY KEY (relid, stripe, attr), - FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED -) WITH (user_catalog_table = true); - -COMMENT ON TABLE cstore_tables IS 'CStore per stripe/column combination metadata'; - CREATE TABLE cstore_skipnodes ( relid oid NOT NULL, stripe bigint NOT NULL, @@ -108,7 +97,7 @@ CREATE TABLE cstore_skipnodes ( exists_stream_length bigint NOT NULL, value_compression_type int NOT NULL, PRIMARY KEY (relid, stripe, attr, block), - FOREIGN KEY (relid, stripe, attr) REFERENCES cstore_stripe_attr(relid, stripe, attr) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_tables IS 'CStore per block metadata'; diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 5285295b9..690e9eba9 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -43,8 +43,6 @@ typedef struct EState *estate; } ModifyState; -static Oid CStoreStripeAttrRelationId(void); -static Oid CStoreStripeAttrIndexRelationId(void); static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreTablesRelationId(void); @@ -63,14 +61,6 @@ static EState * create_estate_for_relation(Relation rel); static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); -/* constants for cstore_stripe_attr */ -#define Natts_cstore_stripe_attr 5 -#define Anum_cstore_stripe_attr_relid 1 -#define Anum_cstore_stripe_attr_stripe 2 -#define Anum_cstore_stripe_attr_attr 3 -#define Anum_cstore_stripe_attr_exists_size 4 -#define Anum_cstore_stripe_attr_value_size 5 - /* constants for cstore_table */ #define Natts_cstore_tables 4 #define Anum_cstore_tables_relid 1 @@ -79,14 +69,15 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); #define Anum_cstore_tables_version_minor 4 /* constants for cstore_stripe */ -#define Natts_cstore_stripes 7 +#define Natts_cstore_stripes 8 #define Anum_cstore_stripes_relid 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 #define Anum_cstore_stripes_data_length 4 -#define Anum_cstore_stripes_block_count 5 -#define Anum_cstore_stripes_block_row_count 6 -#define Anum_cstore_stripes_row_count 7 +#define Anum_cstore_stripes_column_count 5 +#define Anum_cstore_stripes_block_count 6 +#define Anum_cstore_stripes_block_row_count 7 +#define Anum_cstore_stripes_row_count 8 /* constants for cstore_skipnodes */ #define Natts_cstore_skipnodes 12 @@ -328,6 +319,7 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) Int64GetDatum(stripe->id), Int64GetDatum(stripe->fileOffset), Int64GetDatum(stripe->dataLength), + Int32GetDatum(stripe->columnCount), Int32GetDatum(stripe->blockCount), Int32GetDatum(stripe->blockRowCount), Int64GetDatum(stripe->rowCount) @@ -388,6 +380,8 @@ ReadTableMetadata(Oid relid) datumArray[Anum_cstore_stripes_file_offset - 1]); stripeMetadata->dataLength = DatumGetInt64( datumArray[Anum_cstore_stripes_data_length - 1]); + stripeMetadata->columnCount = DatumGetInt32( + datumArray[Anum_cstore_stripes_column_count - 1]); stripeMetadata->blockCount = DatumGetInt32( datumArray[Anum_cstore_stripes_block_count - 1]); stripeMetadata->blockRowCount = DatumGetInt32( @@ -485,103 +479,6 @@ DeleteTableMetadataRowIfExists(Oid relid) } -/* - * SaveStripeFooter stores give StripeFooter as cstore_stripe_attr records. - */ -void -SaveStripeFooter(Oid relid, uint64 stripe, StripeFooter *footer) -{ - Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); - Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, RowExclusiveLock); - - ModifyState *modifyState = StartModifyRelation(cstoreStripeAttrs); - - for (AttrNumber attr = 1; attr <= footer->columnCount; attr++) - { - bool nulls[Natts_cstore_stripe_attr] = { 0 }; - Datum values[Natts_cstore_stripe_attr] = { - ObjectIdGetDatum(relid), - Int64GetDatum(stripe), - Int16GetDatum(attr), - Int64GetDatum(footer->existsSizeArray[attr - 1]), - Int64GetDatum(footer->valueSizeArray[attr - 1]) - }; - - InsertTupleAndEnforceConstraints(modifyState, values, nulls); - } - - FinishModifyRelation(modifyState); - heap_close(cstoreStripeAttrs, NoLock); -} - - -/* - * ReadStripeFooter returns a StripeFooter by reading relevant records from - * cstore_stripe_attr. - */ -StripeFooter * -ReadStripeFooter(Oid relid, uint64 stripe, int relationColumnCount) -{ - StripeFooter *footer = NULL; - HeapTuple heapTuple; - - Oid cstoreStripeAttrOid = CStoreStripeAttrRelationId(); - Relation cstoreStripeAttrs = heap_open(cstoreStripeAttrOid, AccessShareLock); - Relation index = index_open(CStoreStripeAttrIndexRelationId(), AccessShareLock); - TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripeAttrs); - - SysScanDesc scanDescriptor = NULL; - ScanKeyData scanKey[2]; - ScanKeyInit(&scanKey[0], Anum_cstore_stripe_attr_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); - ScanKeyInit(&scanKey[1], Anum_cstore_stripe_attr_stripe, - BTEqualStrategyNumber, F_INT8EQ, Int64GetDatum(stripe)); - - scanDescriptor = systable_beginscan_ordered(cstoreStripeAttrs, index, NULL, 2, - scanKey); - - footer = palloc0(sizeof(StripeFooter)); - footer->existsSizeArray = palloc0(relationColumnCount * sizeof(int64)); - footer->valueSizeArray = palloc0(relationColumnCount * sizeof(int64)); - - /* - * Stripe can have less columns than the relation if ALTER TABLE happens - * after stripe is formed. So we calculate column count of a stripe as - * maximum attribute number for that stripe. - */ - footer->columnCount = 0; - - while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) - { - Datum datumArray[Natts_cstore_stripe_attr]; - bool isNullArray[Natts_cstore_stripe_attr]; - AttrNumber attr = 0; - - heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - attr = DatumGetInt16(datumArray[2]); - - footer->columnCount = Max(footer->columnCount, attr); - - while (attr > relationColumnCount) - { - ereport(ERROR, (errmsg("unexpected attribute %d for a relation with %d attrs", - attr, relationColumnCount))); - } - - footer->existsSizeArray[attr - 1] = - DatumGetInt64(datumArray[Anum_cstore_stripe_attr_exists_size - 1]); - footer->valueSizeArray[attr - 1] = - DatumGetInt64(datumArray[Anum_cstore_stripe_attr_value_size - 1]); - } - - systable_endscan_ordered(scanDescriptor); - index_close(index, NoLock); - heap_close(cstoreStripeAttrs, NoLock); - - return footer; -} - - /* * StartModifyRelation allocates resources for modifications. */ @@ -760,28 +657,6 @@ ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm) } -/* - * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr. - * TODO: should we cache this similar to citus? - */ -static Oid -CStoreStripeAttrRelationId(void) -{ - return get_relname_relid("cstore_stripe_attr", CStoreNamespaceId()); -} - - -/* - * CStoreStripeAttrRelationId returns relation id of cstore_stripe_attr_pkey. - * TODO: should we cache this similar to citus? - */ -static Oid -CStoreStripeAttrIndexRelationId(void) -{ - return get_relname_relid("cstore_stripe_attr_pkey", CStoreNamespaceId()); -} - - /* * CStoreStripesRelationId returns relation id of cstore_stripes. * TODO: should we cache this similar to citus? diff --git a/cstore_reader.c b/cstore_reader.c index 6b5d7ed00..25702b272 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -39,7 +39,6 @@ /* static function declarations */ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, - StripeFooter *stripeFooter, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList); @@ -141,7 +140,6 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu StripeMetadata *stripeMetadata = NULL; List *stripeMetadataList = readState->tableMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); - StripeFooter *stripeFooter = NULL; /* if we have read all stripes, return false */ if (readState->readStripeCount == stripeCount) @@ -154,12 +152,8 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu readState->blockData = NULL; stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); - stripeFooter = ReadStripeFooter(readState->relationId, - stripeMetadata->id, - readState->tupleDescriptor->natts); stripeBuffers = LoadFilteredStripeBuffers(readState->relation, stripeMetadata, - stripeFooter, readState->tupleDescriptor, readState->projectedColumnList, readState->whereClauseList); @@ -333,12 +327,11 @@ CStoreTableRowCount(Relation relation) */ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, - StripeFooter *stripeFooter, TupleDesc tupleDescriptor, - List *projectedColumnList, List *whereClauseList) + TupleDesc tupleDescriptor, List *projectedColumnList, + List *whereClauseList) { StripeBuffers *stripeBuffers = NULL; ColumnBuffers **columnBuffersArray = NULL; - uint64 currentColumnFileOffset = 0; uint32 columnIndex = 0; uint32 columnCount = tupleDescriptor->natts; @@ -358,13 +351,9 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, /* load column data for projected columns */ columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); - currentColumnFileOffset = stripeMetadata->fileOffset; - for (columnIndex = 0; columnIndex < stripeFooter->columnCount; columnIndex++) + for (columnIndex = 0; columnIndex < stripeMetadata->columnCount; columnIndex++) { - uint64 existsSize = stripeFooter->existsSizeArray[columnIndex]; - uint64 valueSize = stripeFooter->valueSizeArray[columnIndex]; - if (projectedColumnMask[columnIndex]) { ColumnBlockSkipNode *blockSkipNode = @@ -379,9 +368,6 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, columnBuffersArray[columnIndex] = columnBuffers; } - - currentColumnFileOffset += existsSize; - currentColumnFileOffset += valueSize; } stripeBuffers = palloc0(sizeof(StripeBuffers)); diff --git a/cstore_writer.c b/cstore_writer.c index 65871b511..91e73ffa8 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -34,7 +34,6 @@ static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount); static StripeMetadata FlushStripe(TableWriteState *writeState); -static StripeFooter * CreateStripeFooter(StripeSkipList *stripeSkipList); static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, int datumTypeLength, @@ -242,7 +241,6 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul MemoryContextReset(writeState->stripeWriteContext); writeState->currentStripeId++; - writeState->currentStripeOffset = 0; /* set stripe data and skip list to NULL so they are recreated next time */ writeState->stripeBuffers = NULL; @@ -450,8 +448,6 @@ static StripeMetadata FlushStripe(TableWriteState *writeState) { StripeMetadata stripeMetadata = { 0 }; - uint64 dataLength = 0; - StripeFooter *stripeFooter = NULL; uint32 columnIndex = 0; uint32 blockIndex = 0; StripeBuffers *stripeBuffers = writeState->stripeBuffers; @@ -464,6 +460,7 @@ FlushStripe(TableWriteState *writeState) uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; uint64 initialFileOffset = writeState->currentFileOffset; + uint64 stripeSize = 0; /* * check if the last block needs serialization , the last block was not serialized @@ -487,16 +484,10 @@ FlushStripe(TableWriteState *writeState) uint64 existsBufferSize = blockBuffers->existsBuffer->len; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; - blockSkipNode->existsBlockOffset = writeState->currentStripeOffset; + blockSkipNode->existsBlockOffset = stripeSize; blockSkipNode->existsLength = existsBufferSize; - writeState->currentStripeOffset += existsBufferSize; + stripeSize += existsBufferSize; } - } - - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - ColumnBlockSkipNode *blockSkipNodeArray = columnSkipNodeArray[columnIndex]; - ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { @@ -506,20 +497,14 @@ FlushStripe(TableWriteState *writeState) CompressionType valueCompressionType = blockBuffers->valueCompressionType; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; - blockSkipNode->valueBlockOffset = writeState->currentStripeOffset; + blockSkipNode->valueBlockOffset = stripeSize; blockSkipNode->valueLength = valueBufferSize; blockSkipNode->valueCompressionType = valueCompressionType; - writeState->currentStripeOffset += valueBufferSize; + stripeSize += valueBufferSize; } } - - /* create skip list and footer buffers */ - SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, - stripeSkipList, tupleDescriptor); - stripeFooter = CreateStripeFooter(stripeSkipList); - /* * Each stripe has only one section: * Data section, in which we store data for each column continuously. @@ -557,17 +542,9 @@ FlushStripe(TableWriteState *writeState) } } - /* finally, we flush the footer buffer */ - SaveStripeFooter(writeState->relationId, - writeState->currentStripeId, - stripeFooter); - - /* set stripe metadata */ - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - dataLength += stripeFooter->existsSizeArray[columnIndex]; - dataLength += stripeFooter->valueSizeArray[columnIndex]; - } + /* create skip list and footer buffers */ + SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, + stripeSkipList, tupleDescriptor); for (blockIndex = 0; blockIndex < blockCount; blockIndex++) { @@ -576,47 +553,16 @@ FlushStripe(TableWriteState *writeState) } stripeMetadata.fileOffset = initialFileOffset; - stripeMetadata.dataLength = dataLength; + stripeMetadata.dataLength = stripeSize; stripeMetadata.id = writeState->currentStripeId; stripeMetadata.blockCount = blockCount; stripeMetadata.blockRowCount = writeState->blockRowCount; + stripeMetadata.columnCount = columnCount; return stripeMetadata; } -/* Creates and returns the footer for given stripe. */ -static StripeFooter * -CreateStripeFooter(StripeSkipList *stripeSkipList) -{ - StripeFooter *stripeFooter = NULL; - uint32 columnIndex = 0; - uint32 columnCount = stripeSkipList->columnCount; - uint64 *existsSizeArray = palloc0(columnCount * sizeof(uint64)); - uint64 *valueSizeArray = palloc0(columnCount * sizeof(uint64)); - - for (columnIndex = 0; columnIndex < columnCount; columnIndex++) - { - ColumnBlockSkipNode *blockSkipNodeArray = - stripeSkipList->blockSkipNodeArray[columnIndex]; - uint32 blockIndex = 0; - - for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) - { - existsSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].existsLength; - valueSizeArray[columnIndex] += blockSkipNodeArray[blockIndex].valueLength; - } - } - - stripeFooter = palloc0(sizeof(StripeFooter)); - stripeFooter->columnCount = columnCount; - stripeFooter->existsSizeArray = existsSizeArray; - stripeFooter->valueSizeArray = valueSizeArray; - - return stripeFooter; -} - - /* * SerializeBoolArray serializes the given boolean array and returns the result * as a StringInfo. This function packs every 8 boolean values into one byte. From 7714b60e5e69665b7cc9bf8f14d4fb104c98a3f2 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 23 Sep 2020 22:52:44 -0700 Subject: [PATCH 059/124] reset memory context at end of execution --- cstore_tableam.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 312e10981..0e630d653 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -58,7 +58,7 @@ CStoreTableAMGetOptions(void) static MemoryContext -GetCStoreMemoryContext(void) +GetCStoreMemoryContext() { if (CStoreContext == NULL) { @@ -69,6 +69,16 @@ GetCStoreMemoryContext(void) } +static void +ResetCStoreMemoryContext() +{ + if (CStoreContext != NULL) + { + MemoryContextReset(CStoreContext); + } +} + + static void cstore_init_write_state(Relation relation) { @@ -591,6 +601,7 @@ CStoreExecutorEnd(QueryDesc *queryDesc) { standard_ExecutorEnd(queryDesc); } + ResetCStoreMemoryContext(); } From 1d69519bd88495227d32a8c4309345456866f0fb Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Fri, 25 Sep 2020 13:03:34 -0700 Subject: [PATCH 060/124] Delete autogenerated expected files --- expected/am_block_filtering.out | 120 -------------------------------- expected/am_copyto.out | 23 ------ expected/am_data_types.out | 78 --------------------- expected/am_load.out | 42 ----------- 4 files changed, 263 deletions(-) delete mode 100644 expected/am_block_filtering.out delete mode 100644 expected/am_copyto.out delete mode 100644 expected/am_data_types.out delete mode 100644 expected/am_load.out diff --git a/expected/am_block_filtering.out b/expected/am_block_filtering.out deleted file mode 100644 index 005b42e64..000000000 --- a/expected/am_block_filtering.out +++ /dev/null @@ -1,120 +0,0 @@ --- --- Test block filtering in cstore_fdw using min/max values in stripe skip lists. --- --- --- filtered_row_count returns number of rows filtered by the WHERE clause. --- If blocks get filtered by cstore_fdw, less rows are passed to WHERE --- clause, so this function should return a lower number. --- -CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS -$$ - DECLARE - result bigint; - rec text; - BEGIN - result := 0; - - FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP - IF rec ~ '^\s+Rows Removed by Filter' then - result := regexp_replace(rec, '[^0-9]*', '', 'g'); - END IF; - END LOOP; - - RETURN result; - END; -$$ LANGUAGE PLPGSQL; --- Create and load data --- block_row_count '1000', stripe_row_count '2000' -set cstore.stripe_row_count = 2000; -set cstore.block_row_count = 1000; -CREATE TABLE test_block_filtering (a int) - USING cstore_tableam; -COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; --- Verify that filtered_row_count is less than 1000 for the following queries -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering'); - filtered_row_count --------------------- - 0 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); - filtered_row_count --------------------- - 801 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 200'); - filtered_row_count --------------------- - 200 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 9900'); - filtered_row_count --------------------- - 101 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a > 9900'); - filtered_row_count --------------------- - 900 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); - filtered_row_count --------------------- - 0 -(1 row) - --- Verify that filtered_row_count is less than 2000 for the following queries -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 1 AND 10'); - filtered_row_count --------------------- - 990 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); - filtered_row_count --------------------- - 1979 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN -10 AND 0'); - filtered_row_count --------------------- - 0 -(1 row) - --- Load data for second time and verify that filtered_row_count is exactly twice as before -COPY test_block_filtering FROM '/Users/jefdavi/wd/cstore2/data/block_filtering.csv' WITH CSV; -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 200'); - filtered_row_count --------------------- - 1602 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a < 0'); - filtered_row_count --------------------- - 0 -(1 row) - -SELECT filtered_row_count('SELECT count(*) FROM test_block_filtering WHERE a BETWEEN 990 AND 2010'); - filtered_row_count --------------------- - 3958 -(1 row) - -set cstore.stripe_row_count to default; -set cstore.block_row_count to default; --- Verify that we are fine with collations which use a different alphabet order -CREATE TABLE collation_block_filtering_test(A text collate "da_DK") - USING cstore_tableam; -COPY collation_block_filtering_test FROM STDIN; -SELECT * FROM collation_block_filtering_test WHERE A > 'B'; - a ---- - Å -(1 row) - diff --git a/expected/am_copyto.out b/expected/am_copyto.out deleted file mode 100644 index c8a5f676b..000000000 --- a/expected/am_copyto.out +++ /dev/null @@ -1,23 +0,0 @@ --- --- Test copying data from cstore_fdw tables. --- -CREATE TABLE test_contestant(handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; --- load table data from file -COPY test_contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; --- export using COPY table TO ... -COPY test_contestant TO STDOUT; -a 01-10-1990 2090 97.1 XA {a} -b 11-01-1990 2203 98.1 XA {a,b} -c 11-01-1988 2907 99.4 XB {w,y} -d 05-05-1985 2314 98.3 XB {} -e 05-05-1995 2236 98.2 XC {a} --- export using COPY (SELECT * FROM table) TO ... -COPY (select * from test_contestant) TO STDOUT; -a 01-10-1990 2090 97.1 XA {a} -b 11-01-1990 2203 98.1 XA {a,b} -c 11-01-1988 2907 99.4 XB {w,y} -d 05-05-1985 2314 98.3 XB {} -e 05-05-1995 2236 98.2 XC {a} -DROP TABLE test_contestant CASCADE; diff --git a/expected/am_data_types.out b/expected/am_data_types.out deleted file mode 100644 index a597ff8de..000000000 --- a/expected/am_data_types.out +++ /dev/null @@ -1,78 +0,0 @@ --- --- Test loading and reading different data types to/from cstore_fdw foreign tables. --- --- Settings to make the result deterministic -SET datestyle = "ISO, YMD"; -SET timezone to 'GMT'; -SET intervalstyle TO 'POSTGRES_VERBOSE'; --- Test array types -CREATE TABLE test_array_types (int_array int[], bigint_array bigint[], - text_array text[]) USING cstore_tableam; -COPY test_array_types FROM '/Users/jefdavi/wd/cstore2/data/array_types.csv' WITH CSV; -SELECT * FROM test_array_types; - int_array | bigint_array | text_array ---------------------------+--------------------------------------------+------------ - {1,2,3} | {1,2,3} | {a,b,c} - {} | {} | {} - {-2147483648,2147483647} | {-9223372036854775808,9223372036854775807} | {""} -(3 rows) - --- Test date/time types -CREATE TABLE test_datetime_types (timestamp timestamp, - timestamp_with_timezone timestamp with time zone, date date, time time, - interval interval) USING cstore_tableam; -COPY test_datetime_types FROM '/Users/jefdavi/wd/cstore2/data/datetime_types.csv' WITH CSV; -SELECT * FROM test_datetime_types; - timestamp | timestamp_with_timezone | date | time | interval ----------------------+-------------------------+------------+----------+----------- - 2000-01-02 04:05:06 | 1999-01-08 12:05:06+00 | 2000-01-02 | 04:05:06 | @ 4 hours - 1970-01-01 00:00:00 | infinity | -infinity | 00:00:00 | @ 0 -(2 rows) - --- Test enum and composite types -CREATE TYPE enum_type AS ENUM ('a', 'b', 'c'); -CREATE TYPE composite_type AS (a int, b text); -CREATE TABLE test_enum_and_composite_types (enum enum_type, - composite composite_type) USING cstore_tableam; -COPY test_enum_and_composite_types FROM - '/Users/jefdavi/wd/cstore2/data/enum_and_composite_types.csv' WITH CSV; -SELECT * FROM test_enum_and_composite_types; - enum | composite -------+----------- - a | (2,b) - b | (3,c) -(2 rows) - --- Test range types -CREATE TABLE test_range_types (int4range int4range, int8range int8range, - numrange numrange, tsrange tsrange) USING cstore_tableam; -COPY test_range_types FROM '/Users/jefdavi/wd/cstore2/data/range_types.csv' WITH CSV; -SELECT * FROM test_range_types; - int4range | int8range | numrange | tsrange ------------+-----------+----------+----------------------------------------------- - [1,3) | [1,3) | [1,3) | ["2000-01-02 00:30:00","2010-02-03 12:30:00") - empty | [1,) | (,) | empty -(2 rows) - --- Test other types -CREATE TABLE test_other_types (bool boolean, bytea bytea, money money, - inet inet, bitstring bit varying(5), uuid uuid, json json) USING cstore_tableam; -COPY test_other_types FROM '/Users/jefdavi/wd/cstore2/data/other_types.csv' WITH CSV; -SELECT * FROM test_other_types; - bool | bytea | money | inet | bitstring | uuid | json -------+------------+-------+-------------+-----------+--------------------------------------+------------------ - f | \xdeadbeef | $1.00 | 192.168.1.2 | 10101 | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | {"key": "value"} - t | \xcdb0 | $1.50 | 127.0.0.1 | | a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11 | [] -(2 rows) - --- Test null values -CREATE TABLE test_null_values (a int, b int[], c composite_type) - USING cstore_tableam; -COPY test_null_values FROM '/Users/jefdavi/wd/cstore2/data/null_values.csv' WITH CSV; -SELECT * FROM test_null_values; - a | b | c ----+--------+----- - | {NULL} | (,) - | | -(2 rows) - diff --git a/expected/am_load.out b/expected/am_load.out deleted file mode 100644 index 02cff343a..000000000 --- a/expected/am_load.out +++ /dev/null @@ -1,42 +0,0 @@ --- --- Test loading data into cstore_fdw tables. --- --- COPY with incorrect delimiter -COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' - WITH DELIMITER '|'; -- ERROR -ERROR: missing data for column "birthdate" -CONTEXT: COPY contestant, line 1: "a,1990-01-10,2090,97.1,XA ,{a}" --- COPY with invalid program -COPY contestant FROM PROGRAM 'invalid_program' WITH CSV; -- ERROR -ERROR: program "invalid_program" failed -DETAIL: command not found --- COPY into uncompressed table from file -COPY contestant FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; --- COPY into uncompressed table from program -COPY contestant FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' WITH CSV; --- COPY into compressed table -set cstore.compression = 'pglz'; -COPY contestant_compressed FROM '/Users/jefdavi/wd/cstore2/data/contestants.1.csv' WITH CSV; --- COPY into uncompressed table from program -COPY contestant_compressed FROM PROGRAM 'cat /Users/jefdavi/wd/cstore2/data/contestants.2.csv' - WITH CSV; -set cstore.compression to default; --- Test column list -CREATE TABLE famous_constants (id int, name text, value real) - USING cstore_tableam; -COPY famous_constants (value, name, id) FROM STDIN WITH CSV; -COPY famous_constants (name, value) FROM STDIN WITH CSV; -SELECT * FROM famous_constants ORDER BY id, name; - id | name | value -----+----------------+----------- - 1 | pi | 3.141 - 2 | e | 2.718 - 3 | gamma | 0.577 - 4 | bohr radius | 5.291e-11 - | avagadro | 6.022e+23 - | electron mass | 9.109e-31 - | proton mass | 1.672e-27 - | speed of light | 2.997e+08 -(8 rows) - -DROP TABLE famous_constants; From 5a077f2308e29f616da3f4be0d85026fbf8d3912 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Fri, 25 Sep 2020 13:10:32 -0700 Subject: [PATCH 061/124] Remove the unused drop event trigger --- cstore_fdw--1.7.sql | 27 --------------------------- cstore_fdw.c | 25 ------------------------- 2 files changed, 52 deletions(-) diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index 84b69be07..fa8b558e0 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -31,33 +31,6 @@ RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C STRICT; -CREATE OR REPLACE FUNCTION cstore_clean_table_resources(oid) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION cstore_drop_trigger() - RETURNS event_trigger - LANGUAGE plpgsql - AS $csdt$ -DECLARE v_obj record; -BEGIN - FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP - - IF v_obj.object_type NOT IN ('table', 'foreign table') THEN - CONTINUE; - END IF; - - PERFORM cstore.cstore_clean_table_resources(v_obj.objid); - - END LOOP; -END; -$csdt$; - -CREATE EVENT TRIGGER cstore_drop_event - ON SQL_DROP - EXECUTE PROCEDURE cstore_drop_trigger(); - CREATE TABLE cstore_tables ( relid oid NOT NULL, block_row_count int NOT NULL, diff --git a/cstore_fdw.c b/cstore_fdw.c index a66ba1d80..9a8882697 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -196,7 +196,6 @@ PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); PG_FUNCTION_INFO_V1(cstore_table_size); PG_FUNCTION_INFO_V1(cstore_fdw_handler); PG_FUNCTION_INFO_V1(cstore_fdw_validator); -PG_FUNCTION_INFO_V1(cstore_clean_table_resources); /* saved hook value in case of unload */ @@ -1179,30 +1178,6 @@ cstore_fdw_validator(PG_FUNCTION_ARGS) } -/* - * cstore_clean_table_resources cleans up table data and metadata with provided - * relation id. The function is meant to be called from drop_event_trigger. It - * has no way of knowing if the provided relation id belongs to a cstore table. - * Therefore it first checks if data file exists at default location before - * attempting to remove data and footer files. If the table is created at a - * custom path than its resources would not be removed. - */ -Datum -cstore_clean_table_resources(PG_FUNCTION_ARGS) -{ - /* - * TODO: Event triggers do not offer the relfilenode of the - * dropped table, and by the time the sql_drop event trigger - * is called, the object is already gone so we can't look it - * up. Therefore, we can't drop the Smgr storage here, which - * means that cascaded drops of cstore foreign tables will - * leak storage. - */ - - PG_RETURN_VOID(); -} - - /* * OptionNamesString finds all options that are valid for the current context, * and concatenates these option names in a comma separated string. The function From ec1e277e8ee7bb57b66e57476a8c707e33bec7a9 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Sat, 26 Sep 2020 23:50:23 -0700 Subject: [PATCH 062/124] Initial implementation of ANALYZE --- Makefile | 4 ++-- cstore_tableam.c | 27 ++++++++++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index ea5a858bf..483aebc35 100644 --- a/Makefile +++ b/Makefile @@ -48,11 +48,11 @@ ifeq ($(USE_FDW),yes) fdw_copyto fdw_alter fdw_truncate fdw_clean endif -# disabled tests: am_block_filtering am_analyze +# disabled tests: am_block_filtering ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o - REGRESS += am_create am_load am_query am_data_types am_functions \ + REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_truncate am_clean endif diff --git a/cstore_tableam.c b/cstore_tableam.c index 312e10981..3f8c37db9 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -470,8 +470,13 @@ static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) { - /* TODO */ - return false; + /* + * Our access method is not pages based, i.e. tuples are not confined + * to pages boundaries. So not much to do here. We return true anyway + * so acquire_sample_rows() in analyze.c would call our + * cstore_scan_analyze_next_tuple() callback. + */ + return true; } @@ -480,7 +485,23 @@ cstore_scan_analyze_next_tuple(TableScanDesc scan, TransactionId OldestXmin, double *liverows, double *deadrows, TupleTableSlot *slot) { - /* TODO */ + /* + * Currently we don't do anything smart to reduce number of rows returned + * for ANALYZE. The TableAM API's ANALYZE functions are designed for page + * based access methods where it chooses random pages, and then reads + * tuples from those pages. + * + * We could do something like that here by choosing sample stripes or blocks, + * but getting that correct might need quite some work. Since cstore_fdw's + * ANALYZE scanned all rows, as a starter we do the same here and scan all + * rows. + */ + if (cstore_getnextslot(scan, ForwardScanDirection, slot)) + { + (*liverows)++; + return true; + } + return false; } From cf0ba6103ed1d8e91a6976b5c001be5d8a7b6f7e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Sat, 26 Sep 2020 12:23:14 -0700 Subject: [PATCH 063/124] Associate metadata with rel filenode --- cstore.c | 23 ++------- cstore.h | 26 +++++----- cstore_fdw--1.7.sql | 16 +++--- cstore_fdw.c | 47 +++++++++--------- cstore_metadata_tables.c | 103 +++++++++++++++++++++++++-------------- cstore_reader.c | 11 +++-- cstore_tableam.c | 13 ++--- cstore_writer.c | 16 +++--- 8 files changed, 134 insertions(+), 121 deletions(-) diff --git a/cstore.c b/cstore.c index f5846a029..d6b6751e2 100644 --- a/cstore.c +++ b/cstore.c @@ -102,26 +102,11 @@ ParseCompressionType(const char *compressionTypeString) /* - * InitializeCStoreTableFile creates data and footer file for a cstore table. - * The function assumes data and footer files do not exist, therefore - * it should be called on empty or non-existing table. Notice that the caller - * is expected to acquire AccessExclusiveLock on the relation. + * InitializeCStoreTableFile initializes metadata for the given relation + * file node. */ void -InitializeCStoreTableFile(Oid relationId, Relation relation, CStoreOptions *cstoreOptions) +InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions) { - TableWriteState *writeState = NULL; - TupleDesc tupleDescriptor = RelationGetDescr(relation); - - InitCStoreTableMetadata(relationId, cstoreOptions->blockRowCount); - - /* - * Initialize state to write to the cstore file. This creates an - * empty data file and a valid footer file for the table. - */ - writeState = CStoreBeginWrite(relationId, - cstoreOptions->compressionType, - cstoreOptions->stripeRowCount, - cstoreOptions->blockRowCount, tupleDescriptor); - CStoreEndWrite(writeState); + InitCStoreTableMetadata(relNode, cstoreOptions->blockRowCount); } diff --git a/cstore.h b/cstore.h index 96fa1ed53..dd5f9e6e1 100644 --- a/cstore.h +++ b/cstore.h @@ -16,7 +16,9 @@ #include "fmgr.h" #include "lib/stringinfo.h" +#include "nodes/parsenodes.h" #include "storage/bufpage.h" +#include "storage/lockdefs.h" #include "utils/relcache.h" /* Defines for valid option names */ @@ -190,8 +192,6 @@ typedef struct StripeBuffers /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { - Oid relationId; - TableMetadata *tableMetadata; StripeMetadata *currentStripeMetadata; TupleDesc tupleDescriptor; @@ -217,7 +217,6 @@ typedef struct TableReadState /* TableWriteState represents state of a cstore file write operation. */ typedef struct TableWriteState { - Oid relationId; TableMetadata *tableMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; @@ -249,11 +248,12 @@ extern int cstore_block_row_count; extern void cstore_init(void); extern CompressionType ParseCompressionType(const char *compressionTypeString); -extern void InitializeCStoreTableFile(Oid relationId, Relation relation, - CStoreOptions *cstoreOptions); +extern void InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions); +extern bool IsCStoreFdwTable(Oid relationId); +extern Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); /* Function declarations for writing to a cstore file */ -extern TableWriteState * CStoreBeginWrite(Oid relationId, +extern TableWriteState * CStoreBeginWrite(Relation relation, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, @@ -263,7 +263,7 @@ extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, extern void CStoreEndWrite(TableWriteState *state); /* Function declarations for reading from a cstore file */ -extern TableReadState * CStoreBeginRead(Oid relationId, +extern TableReadState * CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *qualConditions); extern bool CStoreReadFinished(TableReadState *state); @@ -283,12 +283,14 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); /* cstore_metadata_tables.c */ -extern void InitCStoreTableMetadata(Oid relid, int blockRowCount); -extern void InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe); -extern TableMetadata * ReadTableMetadata(Oid relid); -extern void SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, +extern void DeleteTableMetadataRowIfExists(Oid relfilenode); +extern void InitCStoreTableMetadata(Oid relfilenode, int blockRowCount); +extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); +extern TableMetadata * ReadTableMetadata(Oid relfilenode); +extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, + StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); -extern StripeSkipList * ReadStripeSkipList(Oid relid, uint64 stripe, +extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount); diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index fa8b558e0..b3470b6a5 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -32,17 +32,17 @@ AS 'MODULE_PATHNAME' LANGUAGE C STRICT; CREATE TABLE cstore_tables ( - relid oid NOT NULL, + relfilenode oid NOT NULL, block_row_count int NOT NULL, version_major bigint NOT NULL, version_minor bigint NOT NULL, - PRIMARY KEY (relid) + PRIMARY KEY (relfilenode) ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_tables IS 'CStore table wide metadata'; CREATE TABLE cstore_stripes ( - relid oid NOT NULL, + relfilenode oid NOT NULL, stripe bigint NOT NULL, file_offset bigint NOT NULL, data_length bigint NOT NULL, @@ -50,14 +50,14 @@ CREATE TABLE cstore_stripes ( block_count int NOT NULL, block_row_count int NOT NULL, row_count bigint NOT NULL, - PRIMARY KEY (relid, stripe), - FOREIGN KEY (relid) REFERENCES cstore_tables(relid) ON DELETE CASCADE INITIALLY DEFERRED + PRIMARY KEY (relfilenode, stripe), + FOREIGN KEY (relfilenode) REFERENCES cstore_tables(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_tables IS 'CStore per stripe metadata'; CREATE TABLE cstore_skipnodes ( - relid oid NOT NULL, + relfilenode oid NOT NULL, stripe bigint NOT NULL, attr int NOT NULL, block int NOT NULL, @@ -69,8 +69,8 @@ CREATE TABLE cstore_skipnodes ( exists_stream_offset bigint NOT NULL, exists_stream_length bigint NOT NULL, value_compression_type int NOT NULL, - PRIMARY KEY (relid, stripe, attr, block), - FOREIGN KEY (relid, stripe) REFERENCES cstore_stripes(relid, stripe) ON DELETE CASCADE INITIALLY DEFERRED + PRIMARY KEY (relfilenode, stripe, attr, block), + FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_tables IS 'CStore per block metadata'; diff --git a/cstore_fdw.c b/cstore_fdw.c index 9a8882697..f9f886f79 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -131,7 +131,6 @@ static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void FdwNewRelFileNode(Relation relation); static void TruncateCStoreTables(List *cstoreRelationList); -static bool CStoreTable(Oid relationId); static bool CStoreServer(ForeignServer *server); static bool DistributedTable(Oid relationId); static bool DistributedWorkerCopy(CopyStmt *copyStatement); @@ -189,7 +188,6 @@ static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); #endif static void cstore_fdw_initrel(Relation rel); -static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode); PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); @@ -267,7 +265,8 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) * We have no chance to hook into server creation to create data * directory for it during database creation time. */ - InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); + InitializeCStoreTableFile(relation->rd_node.relNode, + CStoreGetOptions(relationId)); heap_close(relation, AccessExclusiveLock); } } @@ -403,7 +402,7 @@ CopyCStoreTableStatement(CopyStmt *copyStatement) { Oid relationId = RangeVarGetRelid(copyStatement->relation, AccessShareLock, true); - bool cstoreTable = CStoreTable(relationId); + bool cstoreTable = IsCStoreFdwTable(relationId); if (cstoreTable) { bool distributedTable = DistributedTable(relationId); @@ -558,12 +557,11 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) #endif /* init state to write to the cstore file */ - writeState = CStoreBeginWrite(relationId, + writeState = CStoreBeginWrite(relation, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); - writeState->relation = relation; while (nextRowFound) { @@ -686,7 +684,7 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) } relationId = RangeVarGetRelid(relationRangeVar, AccessShareLock, true); - if (!CStoreTable(relationId)) + if (!IsCStoreFdwTable(relationId)) { return; } @@ -765,7 +763,7 @@ FindCStoreTables(List *tableList) { RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); - if (CStoreTable(relationId) && !DistributedTable(relationId)) + if (IsCStoreFdwTable(relationId) && !DistributedTable(relationId)) { cstoreTableList = lappend(cstoreTableList, rangeVar); } @@ -825,10 +823,11 @@ TruncateCStoreTables(List *cstoreRelationList) Relation relation = (Relation) lfirst(relationCell); Oid relationId = relation->rd_id; - Assert(CStoreTable(relationId)); + Assert(IsCStoreFdwTable(relationId)); FdwNewRelFileNode(relation); - InitializeCStoreTableFile(relationId, relation, CStoreGetOptions(relationId)); + InitializeCStoreTableFile(relation->rd_node.relNode, + CStoreGetOptions(relationId)); } } @@ -861,7 +860,6 @@ FdwNewRelFileNode(Relation relation) Relation tmprel; Oid tablespace; Oid filenode; - RelFileNode newrnode; /* * Upgrade to AccessExclusiveLock, and hold until the end of the @@ -887,10 +885,6 @@ FdwNewRelFileNode(Relation relation) filenode = GetNewRelFileNode(tablespace, NULL, persistence); - newrnode.spcNode = tablespace; - newrnode.dbNode = MyDatabaseId; - newrnode.relNode = filenode; - classform->relfilenode = filenode; classform->relpages = 0; /* it's empty until further notice */ classform->reltuples = 0; @@ -900,6 +894,10 @@ FdwNewRelFileNode(Relation relation) CatalogTupleUpdate(pg_class, &tuple->t_self, tuple); CommandCounterIncrement(); + + relation->rd_node.spcNode = tablespace; + relation->rd_node.dbNode = MyDatabaseId; + relation->rd_node.relNode = filenode; } heap_freetuple(tuple); @@ -928,11 +926,11 @@ FdwCreateStorage(Relation relation) /* - * CStoreTable checks if the given table name belongs to a foreign columnar store + * IsCStoreFdwTable checks if the given table name belongs to a foreign columnar store * table. If it does, the function returns true. Otherwise, it returns false. */ -static bool -CStoreTable(Oid relationId) +bool +IsCStoreFdwTable(Oid relationId) { bool cstoreTable = false; char relationKind = 0; @@ -1055,7 +1053,7 @@ Datum cstore_table_size(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); - bool cstoreTable = CStoreTable(relationId); + bool cstoreTable = IsCStoreFdwTable(relationId); Relation relation; BlockNumber nblocks; @@ -1705,6 +1703,7 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) ForeignScan *foreignScan = NULL; List *foreignPrivateList = NIL; List *whereClauseList = NIL; + Relation relation = NULL; cstore_fdw_initrel(currentRelation); @@ -1721,9 +1720,8 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) whereClauseList = foreignScan->scan.plan.qual; columnList = (List *) linitial(foreignPrivateList); - readState = CStoreBeginRead(foreignTableId, - tupleDescriptor, columnList, whereClauseList); - readState->relation = cstore_fdw_open(foreignTableId, AccessShareLock); + relation = cstore_fdw_open(foreignTableId, AccessShareLock); + readState = CStoreBeginRead(relation, tupleDescriptor, columnList, whereClauseList); scanState->fdw_state = (void *) readState; } @@ -2067,13 +2065,12 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - writeState = CStoreBeginWrite(foreignTableOid, + writeState = CStoreBeginWrite(relation, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); - writeState->relation = relation; relationInfo->ri_FdwState = (void *) writeState; } @@ -2196,7 +2193,7 @@ cstore_fdw_initrel(Relation rel) } -static Relation +Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode) { Relation rel = heap_open(relationId, lockmode); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 690e9eba9..4459d3009 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -50,8 +50,7 @@ static Oid CStoreTablesIndexRelationId(void); static Oid CStoreSkipNodesRelationId(void); static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static int TableBlockRowCount(Oid relid); -static void DeleteTableMetadataRowIfExists(Oid relid); +static bool ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount); static ModifyState * StartModifyRelation(Relation rel); static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls); @@ -63,14 +62,14 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_table */ #define Natts_cstore_tables 4 -#define Anum_cstore_tables_relid 1 +#define Anum_cstore_tables_relfilenode 1 #define Anum_cstore_tables_block_row_count 2 #define Anum_cstore_tables_version_major 3 #define Anum_cstore_tables_version_minor 4 /* constants for cstore_stripe */ #define Natts_cstore_stripes 8 -#define Anum_cstore_stripes_relid 1 +#define Anum_cstore_stripes_relfilenode 1 #define Anum_cstore_stripes_stripe 2 #define Anum_cstore_stripes_file_offset 3 #define Anum_cstore_stripes_data_length 4 @@ -81,7 +80,7 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_skipnodes */ #define Natts_cstore_skipnodes 12 -#define Anum_cstore_skipnodes_relid 1 +#define Anum_cstore_skipnodes_relfilenode 1 #define Anum_cstore_skipnodes_stripe 2 #define Anum_cstore_skipnodes_attr 3 #define Anum_cstore_skipnodes_block 4 @@ -99,7 +98,7 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); * InitCStoreTableMetadata adds a record for the given relation in cstore_table. */ void -InitCStoreTableMetadata(Oid relid, int blockRowCount) +InitCStoreTableMetadata(Oid relfilenode, int blockRowCount) { Oid cstoreTablesOid = InvalidOid; Relation cstoreTables = NULL; @@ -107,13 +106,13 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) bool nulls[Natts_cstore_tables] = { 0 }; Datum values[Natts_cstore_tables] = { - ObjectIdGetDatum(relid), + ObjectIdGetDatum(relfilenode), Int32GetDatum(blockRowCount), Int32GetDatum(CSTORE_VERSION_MAJOR), Int32GetDatum(CSTORE_VERSION_MINOR) }; - DeleteTableMetadataRowIfExists(relid); + DeleteTableMetadataRowIfExists(relfilenode); cstoreTablesOid = CStoreTablesRelationId(); cstoreTables = heap_open(cstoreTablesOid, RowExclusiveLock); @@ -133,7 +132,7 @@ InitCStoreTableMetadata(Oid relid, int blockRowCount) * of cstore_skipnodes. */ void -SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, +SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor) { uint32 columnIndex = 0; @@ -155,7 +154,7 @@ SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, &stripeSkipList->blockSkipNodeArray[columnIndex][blockIndex]; Datum values[Natts_cstore_skipnodes] = { - ObjectIdGetDatum(relid), + ObjectIdGetDatum(relfilenode), Int64GetDatum(stripe), Int32GetDatum(columnIndex + 1), Int32GetDatum(blockIndex), @@ -201,7 +200,7 @@ SaveStripeSkipList(Oid relid, uint64 stripe, StripeSkipList *stripeSkipList, * ReadStripeSkipList fetches StripeSkipList for a given stripe. */ StripeSkipList * -ReadStripeSkipList(Oid relid, uint64 stripe, TupleDesc tupleDescriptor, +ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount) { StripeSkipList *skipList = NULL; @@ -218,8 +217,8 @@ ReadStripeSkipList(Oid relid, uint64 stripe, TupleDesc tupleDescriptor, cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); - ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); @@ -311,11 +310,11 @@ ReadStripeSkipList(Oid relid, uint64 stripe, TupleDesc tupleDescriptor, * InsertStripeMetadataRow adds a row to cstore_stripes. */ void -InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) +InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) { bool nulls[Natts_cstore_stripes] = { 0 }; Datum values[Natts_cstore_stripes] = { - ObjectIdGetDatum(relid), + ObjectIdGetDatum(relfilenode), Int64GetDatum(stripe->id), Int64GetDatum(stripe->fileOffset), Int64GetDatum(stripe->dataLength), @@ -339,11 +338,11 @@ InsertStripeMetadataRow(Oid relid, StripeMetadata *stripe) /* - * ReadTableMetadata constructs TableMetadata for a given relid by reading + * ReadTableMetadata constructs TableMetadata for a given relfilenode by reading * from cstore_tables and cstore_stripes. */ TableMetadata * -ReadTableMetadata(Oid relid) +ReadTableMetadata(Oid relfilenode) { Oid cstoreStripesOid = InvalidOid; Relation cstoreStripes = NULL; @@ -352,12 +351,18 @@ ReadTableMetadata(Oid relid) ScanKeyData scanKey[1]; SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple; + bool found = false; TableMetadata *tableMetadata = palloc0(sizeof(TableMetadata)); - tableMetadata->blockRowCount = TableBlockRowCount(relid); + found = ReadCStoreTables(relfilenode, &tableMetadata->blockRowCount); + if (!found) + { + ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", + relfilenode))); + } - ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); cstoreStripesOid = CStoreStripesRelationId(); cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); @@ -402,12 +407,13 @@ ReadTableMetadata(Oid relid) /* - * TableBlockRowCount returns block_row_count column from cstore_tables for a given relid. + * ReadCStoreTables reads corresponding record from cstore_tables. Returns false if + * table was not found in cstore_tables. */ -static int -TableBlockRowCount(Oid relid) +static bool +ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount) { - int blockRowCount = 0; + bool found = false; Oid cstoreTablesOid = InvalidOid; Relation cstoreTables = NULL; Relation index = NULL; @@ -416,12 +422,29 @@ TableBlockRowCount(Oid relid) SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; - ScanKeyInit(&scanKey[0], Anum_cstore_tables_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_cstore_tables_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = heap_open(cstoreTablesOid, AccessShareLock); - index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); + cstoreTables = try_relation_open(cstoreTablesOid, AccessShareLock); + if (cstoreTables == NULL) + { + /* + * Extension has been dropped. This can be called while + * dropping extension or database via ObjectAccess(). + */ + return false; + } + + index = try_relation_open(CStoreTablesIndexRelationId(), AccessShareLock); + if (index == NULL) + { + heap_close(cstoreTables, NoLock); + + /* extension has been dropped */ + return false; + } + tupleDescriptor = RelationGetDescr(cstoreTables); scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); @@ -432,22 +455,24 @@ TableBlockRowCount(Oid relid) Datum datumArray[Natts_cstore_tables]; bool isNullArray[Natts_cstore_tables]; heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - blockRowCount = DatumGetInt32(datumArray[Anum_cstore_tables_block_row_count - 1]); + *blockRowCount = DatumGetInt32(datumArray[Anum_cstore_tables_block_row_count - + 1]); + found = true; } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); heap_close(cstoreTables, NoLock); - return blockRowCount; + return found; } /* - * DeleteTableMetadataRowIfExists removes the row with given relid from cstore_stripes. + * DeleteTableMetadataRowIfExists removes the row with given relfilenode from cstore_stripes. */ -static void -DeleteTableMetadataRowIfExists(Oid relid) +void +DeleteTableMetadataRowIfExists(Oid relfilenode) { Oid cstoreTablesOid = InvalidOid; Relation cstoreTables = NULL; @@ -456,11 +481,17 @@ DeleteTableMetadataRowIfExists(Oid relid) SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; - ScanKeyInit(&scanKey[0], Anum_cstore_tables_relid, - BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relid)); + ScanKeyInit(&scanKey[0], Anum_cstore_tables_relfilenode, + BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = heap_open(cstoreTablesOid, AccessShareLock); + cstoreTables = try_relation_open(cstoreTablesOid, AccessShareLock); + if (cstoreTables == NULL) + { + /* extension has been dropped */ + return; + } + index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); diff --git a/cstore_reader.c b/cstore_reader.c index 25702b272..2ee4101c0 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -80,14 +80,15 @@ static StringInfo ReadFromSmgr(Relation rel, uint64 offset, uint32 size); * read handle that's used during reading rows and finishing the read operation. */ TableReadState * -CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, +CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; TableMetadata *tableMetadata = NULL; MemoryContext stripeReadContext = NULL; + Oid relNode = relation->rd_node.relNode; - tableMetadata = ReadTableMetadata(relationId); + tableMetadata = ReadTableMetadata(relNode); /* * We allocate all stripe specific data in the stripeReadContext, and reset @@ -99,7 +100,7 @@ CStoreBeginRead(Oid relationId, TupleDesc tupleDescriptor, ALLOCSET_DEFAULT_SIZES); readState = palloc0(sizeof(TableReadState)); - readState->relationId = relationId; + readState->relation = relation; readState->tableMetadata = tableMetadata; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; @@ -308,7 +309,7 @@ CStoreTableRowCount(Relation relation) ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - tableMetadata = ReadTableMetadata(relation->rd_id); + tableMetadata = ReadTableMetadata(relation->rd_node.relNode); foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) { @@ -337,7 +338,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, bool *projectedColumnMask = ProjectedColumnMask(columnCount, projectedColumnList); - StripeSkipList *stripeSkipList = ReadStripeSkipList(RelationGetRelid(relation), + StripeSkipList *stripeSkipList = ReadStripeSkipList(relation->rd_node.relNode, stripeMetadata->id, tupleDescriptor, stripeMetadata->blockCount); diff --git a/cstore_tableam.c b/cstore_tableam.c index 243cbcb3e..d091916cf 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -97,13 +97,11 @@ cstore_init_write_state(Relation relation) TupleDesc tupdesc = RelationGetDescr(relation); elog(LOG, "initializing write state for relation %d", relation->rd_id); - CStoreWriteState = CStoreBeginWrite(relation->rd_id, + CStoreWriteState = CStoreBeginWrite(relation, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupdesc); - - CStoreWriteState->relation = relation; } } @@ -134,16 +132,12 @@ cstore_beginscan(Relation relation, Snapshot snapshot, ParallelTableScanDesc parallel_scan, uint32 flags) { - Oid relid = relation->rd_id; TupleDesc tupdesc = relation->rd_att; - CStoreOptions *cstoreOptions = NULL; TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); - cstoreOptions = CStoreTableAMGetOptions(); - scan->cs_base.rs_rd = relation; scan->cs_base.rs_snapshot = snapshot; scan->cs_base.rs_nkeys = nkeys; @@ -171,8 +165,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, columnList = lappend(columnList, var); } - readState = CStoreBeginRead(relid, tupdesc, columnList, NULL); - readState->relation = relation; + readState = CStoreBeginRead(relation, tupdesc, columnList, NULL); scan->cs_readState = readState; @@ -443,7 +436,7 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitializeCStoreTableFile(rel->rd_id, rel, CStoreTableAMGetOptions()); + InitializeCStoreTableFile(newrnode->relNode, CStoreTableAMGetOptions()); smgrclose(srel); } diff --git a/cstore_writer.c b/cstore_writer.c index 91e73ffa8..728c855b4 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -58,7 +58,7 @@ static StringInfo CopyStringInfo(StringInfo sourceString); * will be added. */ TableWriteState * -CStoreBeginWrite(Oid relationId, +CStoreBeginWrite(Relation relation, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) @@ -73,8 +73,9 @@ CStoreBeginWrite(Oid relationId, bool *columnMaskArray = NULL; BlockData *blockData = NULL; uint64 currentStripeId = 0; + Oid relNode = relation->rd_node.relNode; - tableMetadata = ReadTableMetadata(relationId); + tableMetadata = ReadTableMetadata(relNode); /* * If stripeMetadataList is not empty, jump to the position right after @@ -127,7 +128,7 @@ CStoreBeginWrite(Oid relationId, blockData = CreateEmptyBlockData(columnCount, columnMaskArray, blockRowCount); writeState = palloc0(sizeof(TableWriteState)); - writeState->relationId = relationId; + writeState->relation = relation; writeState->tableMetadata = tableMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; @@ -251,7 +252,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul * doesn't free it. */ MemoryContextSwitchTo(oldContext); - InsertStripeMetadataRow(writeState->relationId, &stripeMetadata); + InsertStripeMetadataRow(writeState->relation->rd_node.relNode, + &stripeMetadata); AppendStripeMetadata(tableMetadata, stripeMetadata); } else @@ -280,7 +282,8 @@ CStoreEndWrite(TableWriteState *writeState) MemoryContextReset(writeState->stripeWriteContext); MemoryContextSwitchTo(oldContext); - InsertStripeMetadataRow(writeState->relationId, &stripeMetadata); + InsertStripeMetadataRow(writeState->relation->rd_node.relNode, + &stripeMetadata); AppendStripeMetadata(writeState->tableMetadata, stripeMetadata); } @@ -543,7 +546,8 @@ FlushStripe(TableWriteState *writeState) } /* create skip list and footer buffers */ - SaveStripeSkipList(writeState->relationId, writeState->currentStripeId, + SaveStripeSkipList(writeState->relation->rd_node.relNode, + writeState->currentStripeId, stripeSkipList, tupleDescriptor); for (blockIndex = 0; blockIndex < blockCount; blockIndex++) From d37c717e143fdd07275393f5e81ebbc6780fc069 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Sat, 26 Sep 2020 12:39:16 -0700 Subject: [PATCH 064/124] Clean-up resources on drop --- cstore.c | 80 ++++++++++++++++++++++++++++++++++++++++ cstore.h | 1 + cstore_fdw.c | 50 ------------------------- cstore_metadata_tables.c | 11 ++++++ expected/am_drop.out | 15 ++++++++ expected/fdw_drop.out | 15 ++++++++ sql/am_drop.sql | 8 ++++ sql/fdw_drop.sql | 8 ++++ 8 files changed, 138 insertions(+), 50 deletions(-) diff --git a/cstore.c b/cstore.c index d6b6751e2..1d6e414ae 100644 --- a/cstore.c +++ b/cstore.c @@ -16,9 +16,13 @@ #include #include +#include "access/heapam.h" +#include "catalog/objectaccess.h" +#include "catalog/storage.h" #include "miscadmin.h" #include "utils/guc.h" #include "utils/rel.h" +#include "utils/relcache.h" #include "cstore.h" @@ -38,6 +42,11 @@ static const struct config_enum_entry cstore_compression_options[] = { NULL, 0, false } }; +static object_access_hook_type prevObjectAccess = NULL; + +static void ObjectAccess(ObjectAccessType access, Oid classId, Oid objectId, int subId, + void *arg); + void cstore_init() { @@ -78,6 +87,9 @@ cstore_init() NULL, NULL, NULL); + + prevObjectAccess = object_access_hook; + object_access_hook = ObjectAccess; } @@ -110,3 +122,71 @@ InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions) { InitCStoreTableMetadata(relNode, cstoreOptions->blockRowCount); } + + +/* + * Implements object_access_hook. One of the places this is called is just + * before dropping an object, which allows us to clean-up resources for + * cstore tables while the pg_class record for the table is still there. + */ +static void +ObjectAccess(ObjectAccessType access, Oid classId, Oid objectId, int subId, void *arg) +{ + if (prevObjectAccess) + { + prevObjectAccess(access, classId, objectId, subId, arg); + } + + /* + * Do nothing if this is not a DROP relation command. + */ + if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) + { + return; + } + + if (IsCStoreFdwTable(objectId)) + { + /* + * Drop both metadata and storage. We need to drop storage here since + * we manage relfilenode for FDW tables in the extension. + */ + Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); + RelationOpenSmgr(rel); + RelationDropStorage(rel); + DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + + /* keep the lock since we did physical changes to the relation */ + relation_close(rel, NoLock); + } + else + { + Oid relNode = InvalidOid; + Relation rel = try_relation_open(objectId, AccessExclusiveLock); + if (rel == NULL) + { + return; + } + + relNode = rel->rd_node.relNode; + if (IsCStoreStorage(relNode)) + { + /* + * Drop only metadata for table am cstore tables. Postgres manages + * storage for these tables, so we don't need to drop that. + */ + DeleteTableMetadataRowIfExists(relNode); + + /* keep the lock since we did physical changes to the relation */ + relation_close(rel, NoLock); + } + else + { + /* + * For non-cstore tables, we do nothing. + * Release the lock since we haven't changed the relation. + */ + relation_close(rel, AccessExclusiveLock); + } + } +} diff --git a/cstore.h b/cstore.h index dd5f9e6e1..919352c6c 100644 --- a/cstore.h +++ b/cstore.h @@ -283,6 +283,7 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); /* cstore_metadata_tables.c */ +extern bool IsCStoreStorage(Oid relfilenode); extern void DeleteTableMetadataRowIfExists(Oid relfilenode); extern void InitCStoreTableMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); diff --git a/cstore_fdw.c b/cstore_fdw.c index f9f886f79..d4c5c1ec1 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -126,7 +126,6 @@ static uint64 CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString); static uint64 CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString); static void CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement); -static List * DroppedCStoreRelidList(DropStmt *dropStatement); static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void FdwNewRelFileNode(Relation relation); @@ -315,25 +314,6 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, destReceiver, completionTag); } } - else if (nodeTag(parseTree) == T_DropStmt) - { - List *dropRelids = DroppedCStoreRelidList((DropStmt *) parseTree); - ListCell *lc = NULL; - - /* drop smgr storage */ - foreach(lc, dropRelids) - { - Oid relid = lfirst_oid(lc); - Relation relation = cstore_fdw_open(relid, AccessExclusiveLock); - - RelationOpenSmgr(relation); - RelationDropStorage(relation); - heap_close(relation, AccessExclusiveLock); - } - - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); - } else if (nodeTag(parseTree) == T_TruncateStmt) { TruncateStmt *truncateStatement = (TruncateStmt *) parseTree; @@ -723,36 +703,6 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) } -/* - * DropppedCStoreRelidList extracts and returns the list of cstore relids - * from DROP table statement - */ -static List * -DroppedCStoreRelidList(DropStmt *dropStatement) -{ - List *droppedCStoreRelidList = NIL; - - if (dropStatement->removeType == OBJECT_FOREIGN_TABLE) - { - ListCell *dropObjectCell = NULL; - foreach(dropObjectCell, dropStatement->objects) - { - List *tableNameList = (List *) lfirst(dropObjectCell); - RangeVar *rangeVar = makeRangeVarFromNameList(tableNameList); - - Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); - if (CStoreTable(relationId)) - { - droppedCStoreRelidList = lappend_oid(droppedCStoreRelidList, - relationId); - } - } - } - - return droppedCStoreRelidList; -} - - /* FindCStoreTables returns list of CStore tables from given table list */ static List * FindCStoreTables(List *tableList) diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 4459d3009..e1f1caedf 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -94,6 +94,17 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); #define Anum_cstore_skipnodes_value_compression_type 12 +/* + * IsCStoreStorage returns if relfilenode belongs to a cstore table. + */ +bool +IsCStoreStorage(Oid relfilenode) +{ + uint64 blockRowCount = 0; + return ReadCStoreTables(relfilenode, &blockRowCount); +} + + /* * InitCStoreTableMetadata adds a record for the given relation in cstore_table. */ diff --git a/expected/am_drop.out b/expected/am_drop.out index e1c634d7f..c1fc60519 100644 --- a/expected/am_drop.out +++ b/expected/am_drop.out @@ -12,14 +12,29 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + ?column? +---------- + 2 +(1 row) + -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to table test_schema.test_table +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + ?column? +---------- + 1 +(1 row) + SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop diff --git a/expected/fdw_drop.out b/expected/fdw_drop.out index 926f69337..24c0f518d 100644 --- a/expected/fdw_drop.out +++ b/expected/fdw_drop.out @@ -12,14 +12,29 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + ?column? +---------- + 2 +(1 row) + -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to foreign table test_schema.test_table +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + ?column? +---------- + 1 +(1 row) + SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop diff --git a/sql/am_drop.sql b/sql/am_drop.sql index f92f90b9d..06873aa6e 100644 --- a/sql/am_drop.sql +++ b/sql/am_drop.sql @@ -15,14 +15,22 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset + -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; + +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset DROP SCHEMA test_schema CASCADE; +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; SELECT current_database() datname \gset diff --git a/sql/fdw_drop.sql b/sql/fdw_drop.sql index c64b5c99b..7c6dd5c6e 100644 --- a/sql/fdw_drop.sql +++ b/sql/fdw_drop.sql @@ -15,14 +15,22 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset + -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; +-- make sure DROP deletes metadata +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; + -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; + +SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset DROP SCHEMA test_schema CASCADE; +SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; SELECT current_database() datname \gset From a87c15a1e1078343e6cef85127b0e3eff7f4d1ca Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 1 Oct 2020 21:09:47 -0700 Subject: [PATCH 065/124] Address feedback --- cstore.c | 80 ------------------------------------ cstore.h | 2 - cstore_fdw.c | 82 +++++++++++++++++++++++++++++++++---- cstore_metadata_tables.c | 11 ----- cstore_tableam.c | 87 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 162 insertions(+), 100 deletions(-) diff --git a/cstore.c b/cstore.c index 1d6e414ae..d6b6751e2 100644 --- a/cstore.c +++ b/cstore.c @@ -16,13 +16,9 @@ #include #include -#include "access/heapam.h" -#include "catalog/objectaccess.h" -#include "catalog/storage.h" #include "miscadmin.h" #include "utils/guc.h" #include "utils/rel.h" -#include "utils/relcache.h" #include "cstore.h" @@ -42,11 +38,6 @@ static const struct config_enum_entry cstore_compression_options[] = { NULL, 0, false } }; -static object_access_hook_type prevObjectAccess = NULL; - -static void ObjectAccess(ObjectAccessType access, Oid classId, Oid objectId, int subId, - void *arg); - void cstore_init() { @@ -87,9 +78,6 @@ cstore_init() NULL, NULL, NULL); - - prevObjectAccess = object_access_hook; - object_access_hook = ObjectAccess; } @@ -122,71 +110,3 @@ InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions) { InitCStoreTableMetadata(relNode, cstoreOptions->blockRowCount); } - - -/* - * Implements object_access_hook. One of the places this is called is just - * before dropping an object, which allows us to clean-up resources for - * cstore tables while the pg_class record for the table is still there. - */ -static void -ObjectAccess(ObjectAccessType access, Oid classId, Oid objectId, int subId, void *arg) -{ - if (prevObjectAccess) - { - prevObjectAccess(access, classId, objectId, subId, arg); - } - - /* - * Do nothing if this is not a DROP relation command. - */ - if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) - { - return; - } - - if (IsCStoreFdwTable(objectId)) - { - /* - * Drop both metadata and storage. We need to drop storage here since - * we manage relfilenode for FDW tables in the extension. - */ - Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); - RelationOpenSmgr(rel); - RelationDropStorage(rel); - DeleteTableMetadataRowIfExists(rel->rd_node.relNode); - - /* keep the lock since we did physical changes to the relation */ - relation_close(rel, NoLock); - } - else - { - Oid relNode = InvalidOid; - Relation rel = try_relation_open(objectId, AccessExclusiveLock); - if (rel == NULL) - { - return; - } - - relNode = rel->rd_node.relNode; - if (IsCStoreStorage(relNode)) - { - /* - * Drop only metadata for table am cstore tables. Postgres manages - * storage for these tables, so we don't need to drop that. - */ - DeleteTableMetadataRowIfExists(relNode); - - /* keep the lock since we did physical changes to the relation */ - relation_close(rel, NoLock); - } - else - { - /* - * For non-cstore tables, we do nothing. - * Release the lock since we haven't changed the relation. - */ - relation_close(rel, AccessExclusiveLock); - } - } -} diff --git a/cstore.h b/cstore.h index 919352c6c..ef937ba3c 100644 --- a/cstore.h +++ b/cstore.h @@ -249,8 +249,6 @@ extern void cstore_init(void); extern CompressionType ParseCompressionType(const char *compressionTypeString); extern void InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions); -extern bool IsCStoreFdwTable(Oid relationId); -extern Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(Relation relation, diff --git a/cstore_fdw.c b/cstore_fdw.c index d4c5c1ec1..406a153c4 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -25,6 +25,7 @@ #include "catalog/catalog.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/objectaccess.h" #include "catalog/pg_foreign_table.h" #include "catalog/pg_namespace.h" #include "catalog/storage.h" @@ -54,6 +55,7 @@ #include "parser/parser.h" #include "parser/parse_coerce.h" #include "parser/parse_type.h" +#include "storage/lmgr.h" #include "storage/smgr.h" #include "tcop/utility.h" #include "utils/builtins.h" @@ -105,6 +107,8 @@ static const CStoreValidOption ValidOptionArray[] = { OPTION_NAME_BLOCK_ROW_COUNT, ForeignTableRelationId } }; +static object_access_hook_type prevObjectAccessHook = NULL; + /* local functions forward declarations */ #if PG_VERSION_NUM >= 100000 static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, @@ -130,7 +134,8 @@ static List * FindCStoreTables(List *tableList); static List * OpenRelationsForTruncate(List *cstoreTableList); static void FdwNewRelFileNode(Relation relation); static void TruncateCStoreTables(List *cstoreRelationList); -static bool CStoreServer(ForeignServer *server); +static bool IsCStoreFdwTable(Oid relationId); +static bool IsCStoreServer(ForeignServer *server); static bool DistributedTable(Oid relationId); static bool DistributedWorkerCopy(CopyStmt *copyStatement); static StringInfo OptionNamesString(Oid currentContextId); @@ -187,7 +192,11 @@ static bool CStoreIsForeignScanParallelSafe(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); #endif static void cstore_fdw_initrel(Relation rel); +static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode); static Relation cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode); +static void CStoreFdwObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, + int subId, + void *arg); PG_FUNCTION_INFO_V1(cstore_ddl_event_end_trigger); PG_FUNCTION_INFO_V1(cstore_table_size); @@ -209,6 +218,8 @@ cstore_fdw_init() { PreviousProcessUtilityHook = ProcessUtility_hook; ProcessUtility_hook = CStoreProcessUtility; + prevObjectAccessHook = object_access_hook; + object_access_hook = CStoreFdwObjectAccessHook; } @@ -251,7 +262,7 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) bool missingOK = false; ForeignServer *server = GetForeignServerByName(serverName, missingOK); - if (CStoreServer(server)) + if (IsCStoreServer(server)) { Oid relationId = RangeVarGetRelid(createStatement->base.relation, AccessShareLock, false); @@ -358,7 +369,6 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); } - /* handle other utility statements */ else { @@ -895,7 +905,7 @@ IsCStoreFdwTable(Oid relationId) { ForeignTable *foreignTable = GetForeignTable(relationId); ForeignServer *server = GetForeignServer(foreignTable->serverid); - if (CStoreServer(server)) + if (IsCStoreServer(server)) { cstoreTable = true; } @@ -906,11 +916,11 @@ IsCStoreFdwTable(Oid relationId) /* - * CStoreServer checks if the given foreign server belongs to cstore_fdw. If it + * IsCStoreServer checks if the given foreign server belongs to cstore_fdw. If it * does, the function returns true. Otherwise, it returns false. */ static bool -CStoreServer(ForeignServer *server) +IsCStoreServer(ForeignServer *server) { ForeignDataWrapper *foreignDataWrapper = GetForeignDataWrapper(server->fdwid); bool cstoreServer = false; @@ -2143,7 +2153,7 @@ cstore_fdw_initrel(Relation rel) } -Relation +static Relation cstore_fdw_open(Oid relationId, LOCKMODE lockmode) { Relation rel = heap_open(relationId, lockmode); @@ -2163,3 +2173,61 @@ cstore_fdw_openrv(RangeVar *relation, LOCKMODE lockmode) return rel; } + + +/* + * Implements object_access_hook. One of the places this is called is just + * before dropping an object, which allows us to clean-up resources for + * cstore tables. + * + * When cleaning up resources, we need to have access to the pg_class record + * for the table so we can indentify the relfilenode belonging to the relation. + * We don't have access to this information in sql_drop event triggers, since + * the relation has already been dropped there. object_access_hook is called + * __before__ dropping tables, so we still have access to the pg_class + * entry here. + * + * Note that the utility hook is called once per __command__, and not for + * every object dropped, and since a drop can cascade to other objects, it + * is difficult to get full set of dropped objects in the utility hook. + * But object_access_hook is called once per dropped object, so it is + * much easier to clean-up all dropped objects here. + */ +static void +CStoreFdwObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, + int subId, void *arg) +{ + if (prevObjectAccessHook) + { + prevObjectAccessHook(access, classId, objectId, subId, arg); + } + + /* + * Do nothing if this is not a DROP relation command. + */ + if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) + { + return; + } + + /* + * Lock relation to prevent it from being dropped and to avoid + * race conditions in the next if block. + */ + LockRelationOid(objectId, AccessShareLock); + + if (IsCStoreFdwTable(objectId)) + { + /* + * Drop both metadata and storage. We need to drop storage here since + * we manage relfilenode for FDW tables in the extension. + */ + Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); + RelationOpenSmgr(rel); + RelationDropStorage(rel); + DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + + /* keep the lock since we did physical changes to the relation */ + relation_close(rel, NoLock); + } +} diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index e1f1caedf..4459d3009 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -94,17 +94,6 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); #define Anum_cstore_skipnodes_value_compression_type 12 -/* - * IsCStoreStorage returns if relfilenode belongs to a cstore table. - */ -bool -IsCStoreStorage(Oid relfilenode) -{ - uint64 blockRowCount = 0; - return ReadCStoreTables(relfilenode, &blockRowCount); -} - - /* * InitCStoreTableMetadata adds a record for the given relation in cstore_table. */ diff --git a/cstore_tableam.c b/cstore_tableam.c index d091916cf..6d02ebe24 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -14,6 +14,8 @@ #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/index.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_am.h" #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "commands/progress.h" @@ -30,10 +32,13 @@ #include "storage/smgr.h" #include "utils/builtins.h" #include "utils/rel.h" +#include "utils/syscache.h" #include "cstore.h" #include "cstore_tableam.h" +#define CSTORE_TABLEAM_NAME "cstore_tableam" + typedef struct CStoreScanDescData { TableScanDescData cs_base; @@ -45,6 +50,13 @@ typedef struct CStoreScanDescData *CStoreScanDesc; static TableWriteState *CStoreWriteState = NULL; static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; static MemoryContext CStoreContext = NULL; +static object_access_hook_type prevObjectAccessHook = NULL; + +/* forward declaration for static functions */ +static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid + objectId, int subId, + void *arg); +static bool IsCStoreTableAmTable(Oid relationId); static CStoreOptions * CStoreTableAMGetOptions(void) @@ -624,6 +636,8 @@ cstore_tableam_init() { PreviousExecutorEndHook = ExecutorEnd_hook; ExecutorEnd_hook = CStoreExecutorEnd; + prevObjectAccessHook = object_access_hook; + object_access_hook = CStoreTableAMObjectAccessHook; } @@ -634,6 +648,79 @@ cstore_tableam_finish() } +/* + * Implements object_access_hook. One of the places this is called is just + * before dropping an object, which allows us to clean-up resources for + * cstore tables. + * + * See the comments for CStoreFdwObjectAccessHook for more details. + */ +static void +CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, int + subId, + void *arg) +{ + if (prevObjectAccessHook) + { + prevObjectAccessHook(access, classId, objectId, subId, arg); + } + + /* + * Do nothing if this is not a DROP relation command. + */ + if (access != OAT_DROP || classId != RelationRelationId || OidIsValid(subId)) + { + return; + } + + /* + * Lock relation to prevent it from being dropped and to avoid + * race conditions in the next if block. + */ + LockRelationOid(objectId, AccessShareLock); + + if (IsCStoreTableAmTable(objectId)) + { + /* + * Drop metadata. No need to drop storage here since for + * tableam tables storage is managed by postgres. + */ + Relation rel = table_open(objectId, AccessExclusiveLock); + DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + + /* keep the lock since we did physical changes to the relation */ + table_close(rel, NoLock); + } +} + + +/* + * IsCStoreTableAmTable returns true if relation has cstore_tableam + * access method. This can be called before extension creation. + */ +static bool +IsCStoreTableAmTable(Oid relationId) +{ + bool result; + Relation rel; + + if (!OidIsValid(relationId)) + { + return false; + } + + /* + * Lock relation to prevent it from being dropped & + * avoid race conditions. + */ + rel = relation_open(relationId, AccessShareLock); + result = rel->rd_tableam == GetCstoreTableAmRoutine(); + relation_close(rel, NoLock); + + return result; +} + + static const TableAmRoutine cstore_am_methods = { .type = T_TableAmRoutine, From a70b0c362e71d06db62bcc65e0ae973dbcb0278e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 1 Oct 2020 21:23:06 -0700 Subject: [PATCH 066/124] Rename cstore_tables to cstore_data_files --- cstore.c | 11 ---- cstore.h | 18 +++--- cstore_fdw--1.7.sql | 10 +-- cstore_fdw.c | 19 ++---- cstore_metadata_tables.c | 128 ++++++++++++++++++++------------------- cstore_reader.c | 18 +++--- cstore_tableam.c | 5 +- cstore_writer.c | 26 ++++---- expected/am_drop.out | 8 +-- expected/fdw_drop.out | 8 +-- sql/am_drop.sql | 8 +-- sql/fdw_drop.sql | 8 +-- 12 files changed, 125 insertions(+), 142 deletions(-) diff --git a/cstore.c b/cstore.c index d6b6751e2..a724a62a0 100644 --- a/cstore.c +++ b/cstore.c @@ -99,14 +99,3 @@ ParseCompressionType(const char *compressionTypeString) return compressionType; } - - -/* - * InitializeCStoreTableFile initializes metadata for the given relation - * file node. - */ -void -InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions) -{ - InitCStoreTableMetadata(relNode, cstoreOptions->blockRowCount); -} diff --git a/cstore.h b/cstore.h index ef937ba3c..8efb0e6af 100644 --- a/cstore.h +++ b/cstore.h @@ -87,12 +87,12 @@ typedef struct StripeMetadata } StripeMetadata; -/* TableMetadata represents the metadata of a cstore file. */ -typedef struct TableMetadata +/* DataFileMetadata represents the metadata of a cstore file. */ +typedef struct DataFileMetadata { List *stripeMetadataList; uint64 blockRowCount; -} TableMetadata; +} DataFileMetadata; /* ColumnBlockSkipNode contains statistics for a ColumnBlockData. */ @@ -192,7 +192,7 @@ typedef struct StripeBuffers /* TableReadState represents state of a cstore file read operation. */ typedef struct TableReadState { - TableMetadata *tableMetadata; + DataFileMetadata *datafileMetadata; StripeMetadata *currentStripeMetadata; TupleDesc tupleDescriptor; Relation relation; @@ -217,7 +217,7 @@ typedef struct TableReadState /* TableWriteState represents state of a cstore file write operation. */ typedef struct TableWriteState { - TableMetadata *tableMetadata; + DataFileMetadata *datafileMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; @@ -248,7 +248,6 @@ extern int cstore_block_row_count; extern void cstore_init(void); extern CompressionType ParseCompressionType(const char *compressionTypeString); -extern void InitializeCStoreTableFile(Oid relNode, CStoreOptions *cstoreOptions); /* Function declarations for writing to a cstore file */ extern TableWriteState * CStoreBeginWrite(Relation relation, @@ -281,11 +280,10 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); /* cstore_metadata_tables.c */ -extern bool IsCStoreStorage(Oid relfilenode); -extern void DeleteTableMetadataRowIfExists(Oid relfilenode); -extern void InitCStoreTableMetadata(Oid relfilenode, int blockRowCount); +extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); +extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); -extern TableMetadata * ReadTableMetadata(Oid relfilenode); +extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index b3470b6a5..c19bb1449 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -31,7 +31,7 @@ RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C STRICT; -CREATE TABLE cstore_tables ( +CREATE TABLE cstore_data_files ( relfilenode oid NOT NULL, block_row_count int NOT NULL, version_major bigint NOT NULL, @@ -39,7 +39,7 @@ CREATE TABLE cstore_tables ( PRIMARY KEY (relfilenode) ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore_tables IS 'CStore table wide metadata'; +COMMENT ON TABLE cstore_data_files IS 'CStore data file wide metadata'; CREATE TABLE cstore_stripes ( relfilenode oid NOT NULL, @@ -51,10 +51,10 @@ CREATE TABLE cstore_stripes ( block_row_count int NOT NULL, row_count bigint NOT NULL, PRIMARY KEY (relfilenode, stripe), - FOREIGN KEY (relfilenode) REFERENCES cstore_tables(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED + FOREIGN KEY (relfilenode) REFERENCES cstore_data_files(relfilenode) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore_tables IS 'CStore per stripe metadata'; +COMMENT ON TABLE cstore_stripes IS 'CStore per stripe metadata'; CREATE TABLE cstore_skipnodes ( relfilenode oid NOT NULL, @@ -73,4 +73,4 @@ CREATE TABLE cstore_skipnodes ( FOREIGN KEY (relfilenode, stripe) REFERENCES cstore_stripes(relfilenode, stripe) ON DELETE CASCADE INITIALLY DEFERRED ) WITH (user_catalog_table = true); -COMMENT ON TABLE cstore_tables IS 'CStore per block metadata'; +COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata'; diff --git a/cstore_fdw.c b/cstore_fdw.c index 406a153c4..33a29ad32 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -267,16 +267,8 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) Oid relationId = RangeVarGetRelid(createStatement->base.relation, AccessShareLock, false); Relation relation = cstore_fdw_open(relationId, AccessExclusiveLock); - - /* - * Make sure database directory exists before creating a table. - * This is necessary when a foreign server is created inside - * a template database and a new database is created out of it. - * We have no chance to hook into server creation to create data - * directory for it during database creation time. - */ - InitializeCStoreTableFile(relation->rd_node.relNode, - CStoreGetOptions(relationId)); + CStoreOptions *options = CStoreGetOptions(relationId); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount); heap_close(relation, AccessExclusiveLock); } } @@ -369,6 +361,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, completionTag); } + /* handle other utility statements */ else { @@ -782,12 +775,12 @@ TruncateCStoreTables(List *cstoreRelationList) { Relation relation = (Relation) lfirst(relationCell); Oid relationId = relation->rd_id; + CStoreOptions *options = CStoreGetOptions(relationId); Assert(IsCStoreFdwTable(relationId)); FdwNewRelFileNode(relation); - InitializeCStoreTableFile(relation->rd_node.relNode, - CStoreGetOptions(relationId)); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount); } } @@ -2225,7 +2218,7 @@ CStoreFdwObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, Relation rel = cstore_fdw_open(objectId, AccessExclusiveLock); RelationOpenSmgr(rel); RelationDropStorage(rel); - DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); /* keep the lock since we did physical changes to the relation */ relation_close(rel, NoLock); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 4459d3009..3ebee02e7 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -45,12 +45,12 @@ typedef struct static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); -static Oid CStoreTablesRelationId(void); -static Oid CStoreTablesIndexRelationId(void); +static Oid CStoreDataFilesRelationId(void); +static Oid CStoreDataFilesIndexRelationId(void); static Oid CStoreSkipNodesRelationId(void); static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static bool ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount); +static bool ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount); static ModifyState * StartModifyRelation(Relation rel); static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls); @@ -61,11 +61,11 @@ static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_table */ -#define Natts_cstore_tables 4 -#define Anum_cstore_tables_relfilenode 1 -#define Anum_cstore_tables_block_row_count 2 -#define Anum_cstore_tables_version_major 3 -#define Anum_cstore_tables_version_minor 4 +#define Natts_cstore_data_files 4 +#define Anum_cstore_data_files_relfilenode 1 +#define Anum_cstore_data_files_block_row_count 2 +#define Anum_cstore_data_files_version_major 3 +#define Anum_cstore_data_files_version_minor 4 /* constants for cstore_stripe */ #define Natts_cstore_stripes 8 @@ -95,35 +95,36 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* - * InitCStoreTableMetadata adds a record for the given relation in cstore_table. + * InitCStoreDataFileMetadata adds a record for the given relfilenode + * in cstore_data_files. */ void -InitCStoreTableMetadata(Oid relfilenode, int blockRowCount) +InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount) { - Oid cstoreTablesOid = InvalidOid; - Relation cstoreTables = NULL; + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; ModifyState *modifyState = NULL; - bool nulls[Natts_cstore_tables] = { 0 }; - Datum values[Natts_cstore_tables] = { + bool nulls[Natts_cstore_data_files] = { 0 }; + Datum values[Natts_cstore_data_files] = { ObjectIdGetDatum(relfilenode), Int32GetDatum(blockRowCount), Int32GetDatum(CSTORE_VERSION_MAJOR), Int32GetDatum(CSTORE_VERSION_MINOR) }; - DeleteTableMetadataRowIfExists(relfilenode); + DeleteDataFileMetadataRowIfExists(relfilenode); - cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = heap_open(cstoreTablesOid, RowExclusiveLock); + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock); - modifyState = StartModifyRelation(cstoreTables); + modifyState = StartModifyRelation(cstoreDataFiles); InsertTupleAndEnforceConstraints(modifyState, values, nulls); FinishModifyRelation(modifyState); CommandCounterIncrement(); - heap_close(cstoreTables, NoLock); + heap_close(cstoreDataFiles, NoLock); } @@ -338,11 +339,11 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) /* - * ReadTableMetadata constructs TableMetadata for a given relfilenode by reading - * from cstore_tables and cstore_stripes. + * ReadDataFileMetadata constructs DataFileMetadata for a given relfilenode by reading + * from cstore_data_files and cstore_stripes. */ -TableMetadata * -ReadTableMetadata(Oid relfilenode) +DataFileMetadata * +ReadDataFileMetadata(Oid relfilenode) { Oid cstoreStripesOid = InvalidOid; Relation cstoreStripes = NULL; @@ -353,8 +354,8 @@ ReadTableMetadata(Oid relfilenode) HeapTuple heapTuple; bool found = false; - TableMetadata *tableMetadata = palloc0(sizeof(TableMetadata)); - found = ReadCStoreTables(relfilenode, &tableMetadata->blockRowCount); + DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); + found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); if (!found) { ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", @@ -394,40 +395,41 @@ ReadTableMetadata(Oid relfilenode) stripeMetadata->rowCount = DatumGetInt64( datumArray[Anum_cstore_stripes_row_count - 1]); - tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, - stripeMetadata); + datafileMetadata->stripeMetadataList = lappend( + datafileMetadata->stripeMetadataList, + stripeMetadata); } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); heap_close(cstoreStripes, NoLock); - return tableMetadata; + return datafileMetadata; } /* - * ReadCStoreTables reads corresponding record from cstore_tables. Returns false if - * table was not found in cstore_tables. + * ReadCStoreDataFiles reads corresponding record from cstore_data_files. Returns + * false if table was not found in cstore_data_files. */ static bool -ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount) +ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount) { bool found = false; - Oid cstoreTablesOid = InvalidOid; - Relation cstoreTables = NULL; + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; Relation index = NULL; TupleDesc tupleDescriptor = NULL; ScanKeyData scanKey[1]; SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; - ScanKeyInit(&scanKey[0], Anum_cstore_tables_relfilenode, + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); - cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = try_relation_open(cstoreTablesOid, AccessShareLock); - if (cstoreTables == NULL) + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); + if (cstoreDataFiles == NULL) { /* * Extension has been dropped. This can be called while @@ -436,77 +438,77 @@ ReadCStoreTables(Oid relfilenode, uint64 *blockRowCount) return false; } - index = try_relation_open(CStoreTablesIndexRelationId(), AccessShareLock); + index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock); if (index == NULL) { - heap_close(cstoreTables, NoLock); + heap_close(cstoreDataFiles, NoLock); /* extension has been dropped */ return false; } - tupleDescriptor = RelationGetDescr(cstoreTables); + tupleDescriptor = RelationGetDescr(cstoreDataFiles); - scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); + scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, 1, scanKey); heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { - Datum datumArray[Natts_cstore_tables]; - bool isNullArray[Natts_cstore_tables]; + Datum datumArray[Natts_cstore_data_files]; + bool isNullArray[Natts_cstore_data_files]; heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - *blockRowCount = DatumGetInt32(datumArray[Anum_cstore_tables_block_row_count - + *blockRowCount = DatumGetInt32(datumArray[Anum_cstore_data_files_block_row_count - 1]); found = true; } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); - heap_close(cstoreTables, NoLock); + heap_close(cstoreDataFiles, NoLock); return found; } /* - * DeleteTableMetadataRowIfExists removes the row with given relfilenode from cstore_stripes. + * DeleteDataFileMetadataRowIfExists removes the row with given relfilenode from cstore_stripes. */ void -DeleteTableMetadataRowIfExists(Oid relfilenode) +DeleteDataFileMetadataRowIfExists(Oid relfilenode) { - Oid cstoreTablesOid = InvalidOid; - Relation cstoreTables = NULL; + Oid cstoreDataFilesOid = InvalidOid; + Relation cstoreDataFiles = NULL; Relation index = NULL; ScanKeyData scanKey[1]; SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; - ScanKeyInit(&scanKey[0], Anum_cstore_tables_relfilenode, + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); - cstoreTablesOid = CStoreTablesRelationId(); - cstoreTables = try_relation_open(cstoreTablesOid, AccessShareLock); - if (cstoreTables == NULL) + cstoreDataFilesOid = CStoreDataFilesRelationId(); + cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); + if (cstoreDataFiles == NULL) { /* extension has been dropped */ return; } - index = index_open(CStoreTablesIndexRelationId(), AccessShareLock); + index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock); - scanDescriptor = systable_beginscan_ordered(cstoreTables, index, NULL, 1, scanKey); + scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, 1, scanKey); heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { - ModifyState *modifyState = StartModifyRelation(cstoreTables); + ModifyState *modifyState = StartModifyRelation(cstoreDataFiles); DeleteTupleAndEnforceConstraints(modifyState, heapTuple); FinishModifyRelation(modifyState); } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); - heap_close(cstoreTables, NoLock); + heap_close(cstoreDataFiles, NoLock); } @@ -711,24 +713,24 @@ CStoreStripesIndexRelationId(void) /* - * CStoreTablesRelationId returns relation id of cstore_tables. + * CStoreDataFilesRelationId returns relation id of cstore_data_files. * TODO: should we cache this similar to citus? */ static Oid -CStoreTablesRelationId(void) +CStoreDataFilesRelationId(void) { - return get_relname_relid("cstore_tables", CStoreNamespaceId()); + return get_relname_relid("cstore_data_files", CStoreNamespaceId()); } /* - * CStoreTablesIndexRelationId returns relation id of cstore_tables_idx. + * CStoreDataFilesIndexRelationId returns relation id of cstore_data_files_pkey. * TODO: should we cache this similar to citus? */ static Oid -CStoreTablesIndexRelationId(void) +CStoreDataFilesIndexRelationId(void) { - return get_relname_relid("cstore_tables_pkey", CStoreNamespaceId()); + return get_relname_relid("cstore_data_files_pkey", CStoreNamespaceId()); } diff --git a/cstore_reader.c b/cstore_reader.c index 2ee4101c0..66807ad08 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -84,11 +84,11 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { TableReadState *readState = NULL; - TableMetadata *tableMetadata = NULL; + DataFileMetadata *datafileMetadata = NULL; MemoryContext stripeReadContext = NULL; Oid relNode = relation->rd_node.relNode; - tableMetadata = ReadTableMetadata(relNode); + datafileMetadata = ReadDataFileMetadata(relNode); /* * We allocate all stripe specific data in the stripeReadContext, and reset @@ -101,7 +101,7 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, readState = palloc0(sizeof(TableReadState)); readState->relation = relation; - readState->tableMetadata = tableMetadata; + readState->datafileMetadata = datafileMetadata; readState->projectedColumnList = projectedColumnList; readState->whereClauseList = whereClauseList; readState->stripeBuffers = NULL; @@ -139,7 +139,7 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu { StripeBuffers *stripeBuffers = NULL; StripeMetadata *stripeMetadata = NULL; - List *stripeMetadataList = readState->tableMetadata->stripeMetadataList; + List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); /* if we have read all stripes, return false */ @@ -229,8 +229,8 @@ void CStoreEndRead(TableReadState *readState) { MemoryContextDelete(readState->stripeReadContext); - list_free_deep(readState->tableMetadata->stripeMetadataList); - pfree(readState->tableMetadata); + list_free_deep(readState->datafileMetadata->stripeMetadataList); + pfree(readState->datafileMetadata); pfree(readState); } @@ -305,13 +305,13 @@ FreeBlockData(BlockData *blockData) uint64 CStoreTableRowCount(Relation relation) { - TableMetadata *tableMetadata = NULL; + DataFileMetadata *datafileMetadata = NULL; ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - tableMetadata = ReadTableMetadata(relation->rd_node.relNode); + datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode); - foreach(stripeMetadataCell, tableMetadata->stripeMetadataList) + foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); totalRowCount += stripeMetadata->rowCount; diff --git a/cstore_tableam.c b/cstore_tableam.c index 6d02ebe24..4e7f22c31 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -443,12 +443,13 @@ cstore_relation_set_new_filenode(Relation rel, MultiXactId *minmulti) { SMgrRelation srel; + CStoreOptions *options = CStoreTableAMGetOptions(); Assert(persistence == RELPERSISTENCE_PERMANENT); *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitializeCStoreTableFile(newrnode->relNode, CStoreTableAMGetOptions()); + InitCStoreDataFileMetadata(newrnode->relNode, options->blockRowCount); smgrclose(srel); } @@ -686,7 +687,7 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId * tableam tables storage is managed by postgres. */ Relation rel = table_open(objectId, AccessExclusiveLock); - DeleteTableMetadataRowIfExists(rel->rd_node.relNode); + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); /* keep the lock since we did physical changes to the relation */ table_close(rel, NoLock); diff --git a/cstore_writer.c b/cstore_writer.c index 728c855b4..c70b448c7 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -45,7 +45,7 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, int columnTypeLength, Oid columnCollation, FmgrInfo *comparisonFunction); static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); -static void AppendStripeMetadata(TableMetadata *tableMetadata, +static void AppendStripeMetadata(DataFileMetadata *datafileMetadata, StripeMetadata stripeMetadata); static StringInfo CopyStringInfo(StringInfo sourceString); @@ -64,7 +64,7 @@ CStoreBeginWrite(Relation relation, TupleDesc tupleDescriptor) { TableWriteState *writeState = NULL; - TableMetadata *tableMetadata = NULL; + DataFileMetadata *datafileMetadata = NULL; FmgrInfo **comparisonFunctionArray = NULL; MemoryContext stripeWriteContext = NULL; uint64 currentFileOffset = 0; @@ -75,18 +75,18 @@ CStoreBeginWrite(Relation relation, uint64 currentStripeId = 0; Oid relNode = relation->rd_node.relNode; - tableMetadata = ReadTableMetadata(relNode); + datafileMetadata = ReadDataFileMetadata(relNode); /* * If stripeMetadataList is not empty, jump to the position right after * the last position. */ - if (tableMetadata->stripeMetadataList != NIL) + if (datafileMetadata->stripeMetadataList != NIL) { StripeMetadata *lastStripe = NULL; uint64 lastStripeSize = 0; - lastStripe = llast(tableMetadata->stripeMetadataList); + lastStripe = llast(datafileMetadata->stripeMetadataList); lastStripeSize += lastStripe->dataLength; currentFileOffset = lastStripe->fileOffset + lastStripeSize; @@ -129,7 +129,7 @@ CStoreBeginWrite(Relation relation, writeState = palloc0(sizeof(TableWriteState)); writeState->relation = relation; - writeState->tableMetadata = tableMetadata; + writeState->datafileMetadata = datafileMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; writeState->blockRowCount = blockRowCount; @@ -164,7 +164,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; - TableMetadata *tableMetadata = writeState->tableMetadata; + DataFileMetadata *datafileMetadata = writeState->datafileMetadata; const uint32 blockRowCount = writeState->blockRowCount; BlockData *blockData = writeState->blockData; MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); @@ -254,7 +254,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul MemoryContextSwitchTo(oldContext); InsertStripeMetadataRow(writeState->relation->rd_node.relNode, &stripeMetadata); - AppendStripeMetadata(tableMetadata, stripeMetadata); + AppendStripeMetadata(datafileMetadata, stripeMetadata); } else { @@ -284,11 +284,11 @@ CStoreEndWrite(TableWriteState *writeState) MemoryContextSwitchTo(oldContext); InsertStripeMetadataRow(writeState->relation->rd_node.relNode, &stripeMetadata); - AppendStripeMetadata(writeState->tableMetadata, stripeMetadata); + AppendStripeMetadata(writeState->datafileMetadata, stripeMetadata); } MemoryContextDelete(writeState->stripeWriteContext); - list_free_deep(writeState->tableMetadata->stripeMetadataList); + list_free_deep(writeState->datafileMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); FreeBlockData(writeState->blockData); pfree(writeState); @@ -791,13 +791,13 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) * table footer's stripeMetadataList. */ static void -AppendStripeMetadata(TableMetadata *tableMetadata, StripeMetadata stripeMetadata) +AppendStripeMetadata(DataFileMetadata *datafileMetadata, StripeMetadata stripeMetadata) { StripeMetadata *stripeMetadataCopy = palloc0(sizeof(StripeMetadata)); memcpy(stripeMetadataCopy, &stripeMetadata, sizeof(StripeMetadata)); - tableMetadata->stripeMetadataList = lappend(tableMetadata->stripeMetadataList, - stripeMetadataCopy); + datafileMetadata->stripeMetadataList = lappend(datafileMetadata->stripeMetadataList, + stripeMetadataCopy); } diff --git a/expected/am_drop.out b/expected/am_drop.out index c1fc60519..26de328f6 100644 --- a/expected/am_drop.out +++ b/expected/am_drop.out @@ -12,12 +12,12 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; ?column? ---------- 2 @@ -26,10 +26,10 @@ SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to table test_schema.test_table -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; ?column? ---------- 1 diff --git a/expected/fdw_drop.out b/expected/fdw_drop.out index 24c0f518d..e1ddf0fd0 100644 --- a/expected/fdw_drop.out +++ b/expected/fdw_drop.out @@ -12,12 +12,12 @@ -- 'postgres' directory is excluded from comparison to have the same result. -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; ?column? ---------- 2 @@ -26,10 +26,10 @@ SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to foreign table test_schema.test_table -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; ?column? ---------- 1 diff --git a/sql/am_drop.sql b/sql/am_drop.sql index 06873aa6e..080712881 100644 --- a/sql/am_drop.sql +++ b/sql/am_drop.sql @@ -15,22 +15,22 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset -- DROP cstore_fdw tables DROP TABLE contestant; DROP TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE TABLE test_schema.test_table(data int) USING cstore_tableam; -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset DROP SCHEMA test_schema CASCADE; -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; SELECT current_database() datname \gset diff --git a/sql/fdw_drop.sql b/sql/fdw_drop.sql index 7c6dd5c6e..f89374a5a 100644 --- a/sql/fdw_drop.sql +++ b/sql/fdw_drop.sql @@ -15,22 +15,22 @@ -- store postgres database oid SELECT oid postgres_oid FROM pg_database WHERE datname = 'postgres' \gset -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset -- DROP cstore_fdw tables DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; -- make sure DROP deletes metadata -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; -- Create a cstore_fdw table under a schema and drop it. CREATE SCHEMA test_schema; CREATE FOREIGN TABLE test_schema.test_table(data int) SERVER cstore_server; -SELECT count(*) AS cstore_tables_before_drop FROM cstore.cstore_tables \gset +SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \gset DROP SCHEMA test_schema CASCADE; -SELECT :cstore_tables_before_drop - count(*) FROM cstore.cstore_tables; +SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; SELECT current_database() datname \gset From a8da9acc634bc8ed0e47ca1cf0fb2c6edd9fcffc Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 09:51:24 -0700 Subject: [PATCH 067/124] Fix writes after rollback --- Makefile | 4 +- cstore_writer.c | 14 ++++++- expected/am_rollback.out | 77 +++++++++++++++++++++++++++++++++++++++ expected/fdw_rollback.out | 77 +++++++++++++++++++++++++++++++++++++++ sql/am_rollback.sql | 41 +++++++++++++++++++++ sql/fdw_rollback.sql | 41 +++++++++++++++++++++ 6 files changed, 251 insertions(+), 3 deletions(-) create mode 100644 expected/am_rollback.out create mode 100644 expected/fdw_rollback.out create mode 100644 sql/am_rollback.sql create mode 100644 sql/fdw_rollback.sql diff --git a/Makefile b/Makefile index 483aebc35..ac7e15037 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ ifeq ($(USE_FDW),yes) OBJS += cstore_fdw.o REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types \ fdw_functions fdw_block_filtering fdw_drop fdw_insert \ - fdw_copyto fdw_alter fdw_truncate fdw_clean + fdw_copyto fdw_alter fdw_rollback fdw_truncate fdw_clean endif # disabled tests: am_block_filtering @@ -53,7 +53,7 @@ ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_truncate am_clean + am_drop am_insert am_copyto am_alter am_rollback am_truncate am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore_writer.c b/cstore_writer.c index c70b448c7..1146bd0a3 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -403,7 +403,19 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) PageInit(page, BLCKSZ, 0); } - /* always appending */ + /* + * After a transaction has been rolled-back, we might be + * over-writing the rolledback write, so phdr->pd_lower can be + * different from addr.offset. + * + * We reset pd_lower to reset the rolledback write. + */ + if (phdr->pd_lower > addr.offset) + { + ereport(DEBUG1, (errmsg("over-writing page %u", addr.blockno), + errdetail("This can happen after a roll-back."))); + phdr->pd_lower = addr.offset; + } Assert(phdr->pd_lower == addr.offset); START_CRIT_SECTION(); diff --git a/expected/am_rollback.out b/expected/am_rollback.out new file mode 100644 index 000000000..130baaa3a --- /dev/null +++ b/expected/am_rollback.out @@ -0,0 +1,77 @@ +-- +-- Testing we handle rollbacks properly +-- +CREATE TABLE t(a int, b int) USING cstore_tableam; +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + count +------- + 0 +(1 row) + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 1 +(1 row) + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 30 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 2 +(1 row) + +DROP TABLE t; diff --git a/expected/fdw_rollback.out b/expected/fdw_rollback.out new file mode 100644 index 000000000..f50f9fd19 --- /dev/null +++ b/expected/fdw_rollback.out @@ -0,0 +1,77 @@ +-- +-- Testing we handle rollbacks properly +-- +CREATE FOREIGN TABLE t(a int, b int) SERVER cstore_server; +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + count +------- + 0 +(1 row) + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 1 +(1 row) + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + count +------- + 30 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; + count +------- + 10 +(1 row) + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; +SELECT count(*) FROM t; + count +------- + 20 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + count +------- + 2 +(1 row) + +DROP FOREIGN TABLE t; diff --git a/sql/am_rollback.sql b/sql/am_rollback.sql new file mode 100644 index 000000000..da1cc8ce4 --- /dev/null +++ b/sql/am_rollback.sql @@ -0,0 +1,41 @@ +-- +-- Testing we handle rollbacks properly +-- + +CREATE TABLE t(a int, b int) USING cstore_tableam; + +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; + +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +DROP TABLE t; diff --git a/sql/fdw_rollback.sql b/sql/fdw_rollback.sql new file mode 100644 index 000000000..804868ac9 --- /dev/null +++ b/sql/fdw_rollback.sql @@ -0,0 +1,41 @@ +-- +-- Testing we handle rollbacks properly +-- + +CREATE FOREIGN TABLE t(a int, b int) SERVER cstore_server; + +BEGIN; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +ROLLBACK; +SELECT count(*) FROM t; + +-- check stripe metadata also have been rolled-back +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +-- savepoint rollback +BEGIN; +SAVEPOINT s0; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SAVEPOINT s1; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s0; +SELECT count(*) FROM t; +INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +COMMIT; + +SELECT count(*) FROM t; + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; + +DROP FOREIGN TABLE t; From b72a4d8d1964401e02dbda8d8da3be2f2ce2df6a Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:08:26 -0700 Subject: [PATCH 068/124] Clean-up old metadata on TRUNCATE --- cstore_fdw.c | 1 + cstore_tableam.c | 3 +++ expected/am_truncate.out | 8 ++++++++ expected/fdw_truncate.out | 8 ++++++++ sql/am_truncate.sql | 5 +++++ sql/fdw_truncate.sql | 5 +++++ 6 files changed, 30 insertions(+) diff --git a/cstore_fdw.c b/cstore_fdw.c index 33a29ad32..2790efaca 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -825,6 +825,7 @@ FdwNewRelFileNode(Relation relation) if (OidIsValid(relation->rd_rel->relfilenode)) { RelationDropStorage(relation); + DeleteDataFileMetadataRowIfExists(relation->rd_rel->relfilenode); } if (OidIsValid(relation->rd_rel->reltablespace)) diff --git a/cstore_tableam.c b/cstore_tableam.c index 4e7f22c31..2744a9a76 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -445,6 +445,9 @@ cstore_relation_set_new_filenode(Relation rel, SMgrRelation srel; CStoreOptions *options = CStoreTableAMGetOptions(); + /* delete old relfilenode metadata */ + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + Assert(persistence == RELPERSISTENCE_PERMANENT); *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); diff --git a/expected/am_truncate.out b/expected/am_truncate.out index 99db7fe72..951a77f04 100644 --- a/expected/am_truncate.out +++ b/expected/am_truncate.out @@ -15,6 +15,7 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; -- COMPRESSED CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; set cstore.compression = 'pglz'; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -145,6 +146,13 @@ SELECT * from cstore_truncate_test; ---+--- (0 rows) +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 0 +(1 row) + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/expected/fdw_truncate.out b/expected/fdw_truncate.out index c92c15559..f357c6358 100644 --- a/expected/fdw_truncate.out +++ b/expected/fdw_truncate.out @@ -14,6 +14,7 @@ CREATE FOREIGN TABLE cstore_truncate_test (a int, b int) SERVER cstore_server; CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_server; CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -142,6 +143,13 @@ SELECT * from cstore_truncate_test; ---+--- (0 rows) +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + ?column? +---------- + 0 +(1 row) + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index 3fdce1d82..ae86098ee 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -13,6 +13,8 @@ CREATE TABLE cstore_truncate_test_second (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_compressed (a int, b int) USING cstore_tableam; CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset + INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; set cstore.compression = 'pglz'; @@ -60,6 +62,9 @@ SELECT * from cstore_truncate_test_regular; TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/sql/fdw_truncate.sql b/sql/fdw_truncate.sql index a1849045e..b82e7fc7b 100644 --- a/sql/fdw_truncate.sql +++ b/sql/fdw_truncate.sql @@ -12,6 +12,8 @@ CREATE FOREIGN TABLE cstore_truncate_test_second (a int, b int) SERVER cstore_se CREATE FOREIGN TABLE cstore_truncate_test_compressed (a int, b int) SERVER cstore_server OPTIONS (compression 'pglz'); CREATE TABLE cstore_truncate_test_regular (a int, b int); +SELECT count(*) AS cstore_data_files_before_truncate FROM cstore.cstore_data_files \gset + INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; @@ -57,6 +59,9 @@ SELECT * from cstore_truncate_test_regular; TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; +-- make sure TRUNATE deletes metadata for old relfilenode +SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN From 62fc59202ce052159e7faeb29b53e8c70fbd61b5 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:09:19 -0700 Subject: [PATCH 069/124] Implement nontransactional truncate --- cstore.h | 2 +- cstore_metadata_tables.c | 13 ++++++++++--- cstore_reader.c | 4 ++-- cstore_tableam.c | 32 +++++++++++++++++++++++++++++--- cstore_writer.c | 2 +- expected/am_truncate.out | 24 ++++++++++++++++++++++++ expected/fdw_truncate.out | 24 ++++++++++++++++++++++++ sql/am_truncate.sql | 14 ++++++++++++++ sql/fdw_truncate.sql | 14 ++++++++++++++ 9 files changed, 119 insertions(+), 10 deletions(-) diff --git a/cstore.h b/cstore.h index 8efb0e6af..489a4839b 100644 --- a/cstore.h +++ b/cstore.h @@ -283,7 +283,7 @@ extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressio extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); -extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode); +extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 3ebee02e7..7c214eed5 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -343,7 +343,7 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) * from cstore_data_files and cstore_stripes. */ DataFileMetadata * -ReadDataFileMetadata(Oid relfilenode) +ReadDataFileMetadata(Oid relfilenode, bool missingOk) { Oid cstoreStripesOid = InvalidOid; Relation cstoreStripes = NULL; @@ -358,8 +358,15 @@ ReadDataFileMetadata(Oid relfilenode) found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); if (!found) { - ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", - relfilenode))); + if (!missingOk) + { + ereport(ERROR, (errmsg("Relfilenode %d doesn't belong to a cstore table.", + relfilenode))); + } + else + { + return NULL; + } } ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, diff --git a/cstore_reader.c b/cstore_reader.c index 66807ad08..e51695353 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -88,7 +88,7 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, MemoryContext stripeReadContext = NULL; Oid relNode = relation->rd_node.relNode; - datafileMetadata = ReadDataFileMetadata(relNode); + datafileMetadata = ReadDataFileMetadata(relNode, false); /* * We allocate all stripe specific data in the stripeReadContext, and reset @@ -309,7 +309,7 @@ CStoreTableRowCount(Relation relation) ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode); + datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode, false); foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) { diff --git a/cstore_tableam.c b/cstore_tableam.c index 2744a9a76..0369ca15a 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -443,7 +443,20 @@ cstore_relation_set_new_filenode(Relation rel, MultiXactId *minmulti) { SMgrRelation srel; - CStoreOptions *options = CStoreTableAMGetOptions(); + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + uint64 blockRowCount = 0; + + if (metadata != NULL) + { + /* existing table (e.g. TRUNCATE), use existing blockRowCount */ + blockRowCount = metadata->blockRowCount; + } + else + { + /* new table, use options */ + CStoreOptions *options = CStoreTableAMGetOptions(); + blockRowCount = options->blockRowCount; + } /* delete old relfilenode metadata */ DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); @@ -452,7 +465,7 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitCStoreDataFileMetadata(newrnode->relNode, options->blockRowCount); + InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount); smgrclose(srel); } @@ -460,7 +473,20 @@ cstore_relation_set_new_filenode(Relation rel, static void cstore_relation_nontransactional_truncate(Relation rel) { - elog(ERROR, "cstore_relation_nontransactional_truncate not implemented"); + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + + /* + * No need to set new relfilenode, since the table was created in this + * transaction and no other transaction can see this relation yet. We + * can just truncate the relation. + * + * This is similar to what is done in heapam_relation_nontransactional_truncate. + */ + RelationTruncate(rel, 0); + + /* Delete old relfilenode metadata and recreate it */ + DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount); } diff --git a/cstore_writer.c b/cstore_writer.c index 1146bd0a3..8069f4aba 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -75,7 +75,7 @@ CStoreBeginWrite(Relation relation, uint64 currentStripeId = 0; Oid relNode = relation->rd_node.relNode; - datafileMetadata = ReadDataFileMetadata(relNode); + datafileMetadata = ReadDataFileMetadata(relNode, false); /* * If stripeMetadataList is not empty, jump to the position right after diff --git a/expected/am_truncate.out b/expected/am_truncate.out index 951a77f04..245c72062 100644 --- a/expected/am_truncate.out +++ b/expected/am_truncate.out @@ -153,6 +153,30 @@ SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_fil 0 (1 row) +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE TABLE cstore_same_transaction_truncate(a int) USING cstore_tableam; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +SELECT * FROM cstore_same_transaction_truncate; + a +---- + 20 + 21 + 22 + 23 +(4 rows) + +DROP TABLE cstore_same_transaction_truncate; -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/expected/fdw_truncate.out b/expected/fdw_truncate.out index f357c6358..6192c704c 100644 --- a/expected/fdw_truncate.out +++ b/expected/fdw_truncate.out @@ -150,6 +150,30 @@ SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_fil 0 (1 row) +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE FOREIGN TABLE cstore_same_transaction_truncate(a int) SERVER cstore_server; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; + ?column? +---------- + 1 +(1 row) + +SELECT * FROM cstore_same_transaction_truncate; + a +---- + 20 + 21 + 22 + 23 +(4 rows) + +DROP FOREIGN TABLE cstore_same_transaction_truncate; -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/sql/am_truncate.sql b/sql/am_truncate.sql index ae86098ee..5d27a69fb 100644 --- a/sql/am_truncate.sql +++ b/sql/am_truncate.sql @@ -65,6 +65,20 @@ SELECT * from cstore_truncate_test; -- make sure TRUNATE deletes metadata for old relfilenode SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE TABLE cstore_same_transaction_truncate(a int) USING cstore_tableam; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; + +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; +SELECT * FROM cstore_same_transaction_truncate; + +DROP TABLE cstore_same_transaction_truncate; + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN diff --git a/sql/fdw_truncate.sql b/sql/fdw_truncate.sql index b82e7fc7b..ed2aaa04a 100644 --- a/sql/fdw_truncate.sql +++ b/sql/fdw_truncate.sql @@ -62,6 +62,20 @@ SELECT * from cstore_truncate_test; -- make sure TRUNATE deletes metadata for old relfilenode SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; +-- test if truncation in the same transaction that created the table works properly +BEGIN; +CREATE FOREIGN TABLE cstore_same_transaction_truncate(a int) SERVER cstore_server; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(1, 100); +TRUNCATE cstore_same_transaction_truncate; +INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 23); +COMMIT; + +-- should output "1" for the newly created relation +SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; +SELECT * FROM cstore_same_transaction_truncate; + +DROP FOREIGN TABLE cstore_same_transaction_truncate; + -- test if a cached truncate from a pl/pgsql function works CREATE FUNCTION cstore_truncate_test_regular_func() RETURNS void AS $$ BEGIN From 9b9b9e2cf052457ef96444c5c222647e5e4672fd Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Thu, 8 Oct 2020 19:07:18 +0200 Subject: [PATCH 070/124] remove double declaration of stripeMetadata (#20) Compilers seem to behave differently with variable shadowing as both I and the marlin deployment have segfaults when querying a cstore table today, however, CI seem to not care :D This removes a double declaration that was not caught in #10 --- cstore_reader.c | 1 - 1 file changed, 1 deletion(-) diff --git a/cstore_reader.c b/cstore_reader.c index e51695353..499c990d4 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -138,7 +138,6 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu while (readState->stripeBuffers == NULL) { StripeBuffers *stripeBuffers = NULL; - StripeMetadata *stripeMetadata = NULL; List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); From 92e160344321e2ac26ea782ff3ccfc37eaf0703f Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 8 Oct 2020 11:03:07 -0700 Subject: [PATCH 071/124] Remove shadowed variables --- Makefile | 2 +- cstore_reader.c | 7 +++---- cstore_writer.c | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index ac7e15037..461c33280 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ else $(error version $(VER) is not supported) endif -PG_CPPFLAGS = -std=c11 +PG_CPPFLAGS = -std=c11 -Wshadow OBJS = cstore.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o diff --git a/cstore_reader.c b/cstore_reader.c index 499c990d4..69a115ed9 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -179,7 +179,6 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu uint32 lastBlockIndex = 0; uint32 blockRowCount = 0; uint32 stripeRowCount = 0; - StripeMetadata *stripeMetadata = readState->currentStripeMetadata; stripeRowCount = stripeMetadata->rowCount; lastBlockIndex = stripeRowCount / stripeMetadata->blockRowCount; @@ -995,10 +994,10 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor for (defValIndex = 0; defValIndex < tupleConstraints->num_defval; defValIndex++) { - AttrDefault defaultValue = tupleConstraints->defval[defValIndex]; - if (defaultValue.adnum == attributeForm->attnum) + AttrDefault attrDefault = tupleConstraints->defval[defValIndex]; + if (attrDefault.adnum == attributeForm->attnum) { - defaultValueNode = stringToNode(defaultValue.adbin); + defaultValueNode = stringToNode(attrDefault.adbin); break; } } diff --git a/cstore_writer.c b/cstore_writer.c index 8069f4aba..9d57ec275 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -386,7 +386,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) while (addr.blockno >= nblocks) { - Buffer buffer = ReadBuffer(rel, P_NEW); + buffer = ReadBuffer(rel, P_NEW); ReleaseBuffer(buffer); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); } @@ -534,7 +534,6 @@ FlushStripe(TableWriteState *writeState) for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; - uint32 blockIndex = 0; for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { From d1c7d9f09d6a5eb8405c3807f4a6b02b86019227 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 8 Oct 2020 11:29:27 -0700 Subject: [PATCH 072/124] address feedback --- Makefile | 2 +- cstore_reader.c | 5 +---- cstore_writer.c | 4 ++-- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 461c33280..6ef8431c8 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ else $(error version $(VER) is not supported) endif -PG_CPPFLAGS = -std=c11 -Wshadow +PG_CFLAGS = -std=c11 -Wshadow OBJS = cstore.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o diff --git a/cstore_reader.c b/cstore_reader.c index 69a115ed9..cf2d0b171 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -988,7 +988,6 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, static Datum ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeForm) { - Datum defaultValue = 0; Node *defaultValueNode = NULL; int defValIndex = 0; @@ -1009,7 +1008,7 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor if (IsA(defaultValueNode, Const)) { Const *constNode = (Const *) defaultValueNode; - defaultValue = constNode->constvalue; + return constNode->constvalue; } else { @@ -1018,8 +1017,6 @@ ColumnDefaultValue(TupleConstr *tupleConstraints, Form_pg_attribute attributeFor errhint("Expression is either mutable or " "does not evaluate to constant value"))); } - - return defaultValue; } diff --git a/cstore_writer.c b/cstore_writer.c index 9d57ec275..2c0ca541e 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -386,8 +386,8 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) while (addr.blockno >= nblocks) { - buffer = ReadBuffer(rel, P_NEW); - ReleaseBuffer(buffer); + Buffer newBuffer = ReadBuffer(rel, P_NEW); + ReleaseBuffer(newBuffer); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); } From 7cc8c8c155ad07e8a9be7a844dc332a5c2a9ffda Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:27:58 -0700 Subject: [PATCH 073/124] Support VACUUM FULL --- Makefile | 2 +- cstore_tableam.c | 122 ++++++++++++++++++++++++++++++++++------- expected/am_vacuum.out | 89 ++++++++++++++++++++++++++++++ sql/am_vacuum.sql | 37 +++++++++++++ 4 files changed, 229 insertions(+), 21 deletions(-) create mode 100644 expected/am_vacuum.out create mode 100644 sql/am_vacuum.sql diff --git a/Makefile b/Makefile index 6ef8431c8..60d8855f8 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,7 @@ ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_rollback am_truncate am_clean + am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean endif ifeq ($(enable_coverage),yes) diff --git a/cstore_tableam.c b/cstore_tableam.c index 0369ca15a..337dbe06f 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -19,6 +19,7 @@ #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "commands/progress.h" +#include "commands/vacuum.h" #include "executor/executor.h" #include "nodes/makefuncs.h" #include "optimizer/plancat.h" @@ -131,6 +132,36 @@ cstore_free_write_state() } +static List * +RelationColumnList(Relation rel) +{ + List *columnList = NIL; + TupleDesc tupdesc = RelationGetDescr(rel); + + for (int i = 0; i < tupdesc->natts; i++) + { + Index varno = 0; + AttrNumber varattno = i + 1; + Oid vartype = tupdesc->attrs[i].atttypid; + int32 vartypmod = 0; + Oid varcollid = 0; + Index varlevelsup = 0; + Var *var; + + if (tupdesc->attrs[i].attisdropped) + { + continue; + } + + var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); + columnList = lappend(columnList, var); + } + + return columnList; +} + + static const TupleTableSlotOps * cstore_slot_callbacks(Relation relation) { @@ -157,25 +188,7 @@ cstore_beginscan(Relation relation, Snapshot snapshot, scan->cs_base.rs_flags = flags; scan->cs_base.rs_parallel = parallel_scan; - for (int i = 0; i < tupdesc->natts; i++) - { - Index varno = 0; - AttrNumber varattno = i + 1; - Oid vartype = tupdesc->attrs[i].atttypid; - int32 vartypmod = 0; - Oid varcollid = 0; - Index varlevelsup = 0; - Var *var; - - if (tupdesc->attrs[i].attisdropped) - { - continue; - } - - var = makeVar(varno, varattno, vartype, vartypmod, - varcollid, varlevelsup); - columnList = lappend(columnList, var); - } + columnList = RelationColumnList(relation); readState = CStoreBeginRead(relation, tupdesc, columnList, NULL); @@ -497,6 +510,13 @@ cstore_relation_copy_data(Relation rel, const RelFileNode *newrnode) } +/* + * cstore_relation_copy_for_cluster is called on VACUUM FULL, at which + * we should copy data from OldHeap to NewHeap. + * + * In general TableAM case this can also be called for the CLUSTER command + * which is not applicable for cstore since it doesn't support indexes. + */ static void cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, Relation OldIndex, bool use_sort, @@ -507,7 +527,69 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, double *tups_vacuumed, double *tups_recently_dead) { - elog(ERROR, "cstore_relation_copy_for_cluster not implemented"); + TableWriteState *writeState = NULL; + TableReadState *readState = NULL; + CStoreOptions *cstoreOptions = NULL; + Datum *sourceValues = NULL; + bool *sourceNulls = NULL; + Datum *targetValues = NULL; + bool *targetNulls = NULL; + TupleDesc sourceDesc = RelationGetDescr(OldHeap); + TupleDesc targetDesc = RelationGetDescr(NewHeap); + + if (OldIndex != NULL || use_sort) + { + ereport(ERROR, (errmsg("cstore_am doesn't support indexes"))); + } + + /* + * copy_table_data in cluster.c assumes tuple descriptors are exactly + * the same. Even dropped columns exist and are marked as attisdropped + * in the target relation. + */ + Assert(sourceDesc->natts == targetDesc->natts); + + cstoreOptions = CStoreTableAMGetOptions(); + + writeState = CStoreBeginWrite(NewHeap, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + targetDesc); + + readState = CStoreBeginRead(OldHeap, sourceDesc, RelationColumnList(OldHeap), NULL); + + sourceValues = palloc0(sourceDesc->natts * sizeof(Datum)); + sourceNulls = palloc0(sourceDesc->natts * sizeof(bool)); + + targetValues = palloc0(targetDesc->natts * sizeof(Datum)); + targetNulls = palloc0(targetDesc->natts * sizeof(bool)); + + *num_tuples = 0; + + while (CStoreReadNextRow(readState, sourceValues, sourceNulls)) + { + memset(targetNulls, true, targetDesc->natts * sizeof(bool)); + + for (int attrIndex = 0; attrIndex < sourceDesc->natts; attrIndex++) + { + FormData_pg_attribute *sourceAttr = TupleDescAttr(sourceDesc, attrIndex); + + if (!sourceAttr->attisdropped) + { + targetNulls[attrIndex] = sourceNulls[attrIndex]; + targetValues[attrIndex] = sourceValues[attrIndex]; + } + } + + CStoreWriteRow(writeState, targetValues, targetNulls); + (*num_tuples)++; + } + + *tups_vacuumed = *num_tuples; + + CStoreEndWrite(writeState); + CStoreEndRead(readState); } diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out new file mode 100644 index 000000000..d689be800 --- /dev/null +++ b/expected/am_vacuum.out @@ -0,0 +1,89 @@ +CREATE TABLE t(a int, b int) USING cstore_tableam; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 0 +(1 row) + +INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; +INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; +INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; +SELECT sum(a), sum(b) FROM t; + sum | sum +-----+------ + 465 | 9455 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 3 +(1 row) + +-- vacuum full should merge stripes together +VACUUM FULL t; +SELECT sum(a), sum(b) FROM t; + sum | sum +-----+------ + 465 | 9455 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 1 +(1 row) + +-- test the case when all data cannot fit into a single stripe +SET cstore.stripe_row_count TO 1000; +INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; +SELECT sum(a), sum(b) FROM t; + sum | sum +---------+--------- + 3126715 | 6261955 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 4 +(1 row) + +VACUUM FULL t; +SELECT sum(a), sum(b) FROM t; + sum | sum +---------+--------- + 3126715 | 6261955 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + count +------- + 3 +(1 row) + +-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs +ALTER TABLE t DROP COLUMN a; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + stripe | attr | block | ?column? | ?column? +--------+------+-------+----------+---------- + 0 | 1 | 0 | f | f + 0 | 2 | 0 | f | f + 1 | 1 | 0 | f | f + 1 | 2 | 0 | f | f + 2 | 1 | 0 | f | f + 2 | 2 | 0 | f | f +(6 rows) + +VACUUM FULL t; +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + stripe | attr | block | ?column? | ?column? +--------+------+-------+----------+---------- + 0 | 1 | 0 | t | t + 0 | 2 | 0 | f | f + 1 | 1 | 0 | t | t + 1 | 2 | 0 | f | f + 2 | 1 | 0 | t | t + 2 | 2 | 0 | f | f +(6 rows) + diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql new file mode 100644 index 000000000..070a13b05 --- /dev/null +++ b/sql/am_vacuum.sql @@ -0,0 +1,37 @@ +CREATE TABLE t(a int, b int) USING cstore_tableam; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; +INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; +INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- vacuum full should merge stripes together +VACUUM FULL t; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- test the case when all data cannot fit into a single stripe +SET cstore.stripe_row_count TO 1000; +INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +VACUUM FULL t; + +SELECT sum(a), sum(b) FROM t; +SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; + +-- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs +ALTER TABLE t DROP COLUMN a; + +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + +VACUUM FULL t; + +SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; + From eeb25aca856142b6395a515730863ea0f32a80fe Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:28:50 -0700 Subject: [PATCH 074/124] Add a test which checks for resource clean-up --- expected/am_vacuum.out | 16 ++++++++++++++++ sql/am_vacuum.sql | 10 ++++++++++ 2 files changed, 26 insertions(+) diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index d689be800..3abb3c668 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -1,3 +1,4 @@ +SELECT count(*) AS columnar_table_count FROM cstore.cstore_tables \gset CREATE TABLE t(a int, b int) USING cstore_tableam; SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; count @@ -87,3 +88,18 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs 2 | 2 | 0 | f | f (6 rows) +-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands +SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; + ?column? +---------- + 1 +(1 row) + +DROP TABLE t; +-- Make sure we cleaned the metadata for t too +SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; + ?column? +---------- + 0 +(1 row) + diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 070a13b05..6a5e0687e 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -1,4 +1,7 @@ +SELECT count(*) AS columnar_table_count FROM cstore.cstore_tables \gset + CREATE TABLE t(a int, b int) USING cstore_tableam; + SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; @@ -35,3 +38,10 @@ VACUUM FULL t; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; +-- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands +SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; + +DROP TABLE t; + +-- Make sure we cleaned the metadata for t too +SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; From aa3032cfdd90c1edab6393c234174828a8238007 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:30:02 -0700 Subject: [PATCH 075/124] Address feedback --- cstore_tableam.c | 38 ++++++++++---------------------------- expected/am_vacuum.out | 6 +++--- sql/am_vacuum.sql | 6 +++--- 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 337dbe06f..39a0695e2 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -143,8 +143,8 @@ RelationColumnList(Relation rel) Index varno = 0; AttrNumber varattno = i + 1; Oid vartype = tupdesc->attrs[i].atttypid; - int32 vartypmod = 0; - Oid varcollid = 0; + int32 vartypmod = tupdesc->attrs[i].atttypmod; + Oid varcollid = tupdesc->attrs[i].attcollation; Index varlevelsup = 0; Var *var; @@ -530,16 +530,14 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, TableWriteState *writeState = NULL; TableReadState *readState = NULL; CStoreOptions *cstoreOptions = NULL; - Datum *sourceValues = NULL; - bool *sourceNulls = NULL; - Datum *targetValues = NULL; - bool *targetNulls = NULL; + Datum *values = NULL; + bool *nulls = NULL; TupleDesc sourceDesc = RelationGetDescr(OldHeap); TupleDesc targetDesc = RelationGetDescr(NewHeap); if (OldIndex != NULL || use_sort) { - ereport(ERROR, (errmsg("cstore_am doesn't support indexes"))); + ereport(ERROR, (errmsg(CSTORE_TABLEAM_NAME " doesn't support indexes"))); } /* @@ -559,34 +557,18 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, readState = CStoreBeginRead(OldHeap, sourceDesc, RelationColumnList(OldHeap), NULL); - sourceValues = palloc0(sourceDesc->natts * sizeof(Datum)); - sourceNulls = palloc0(sourceDesc->natts * sizeof(bool)); - - targetValues = palloc0(targetDesc->natts * sizeof(Datum)); - targetNulls = palloc0(targetDesc->natts * sizeof(bool)); + values = palloc0(sourceDesc->natts * sizeof(Datum)); + nulls = palloc0(sourceDesc->natts * sizeof(bool)); *num_tuples = 0; - while (CStoreReadNextRow(readState, sourceValues, sourceNulls)) + while (CStoreReadNextRow(readState, values, nulls)) { - memset(targetNulls, true, targetDesc->natts * sizeof(bool)); - - for (int attrIndex = 0; attrIndex < sourceDesc->natts; attrIndex++) - { - FormData_pg_attribute *sourceAttr = TupleDescAttr(sourceDesc, attrIndex); - - if (!sourceAttr->attisdropped) - { - targetNulls[attrIndex] = sourceNulls[attrIndex]; - targetValues[attrIndex] = sourceValues[attrIndex]; - } - } - - CStoreWriteRow(writeState, targetValues, targetNulls); + CStoreWriteRow(writeState, values, nulls); (*num_tuples)++; } - *tups_vacuumed = *num_tuples; + *tups_vacuumed = 0; CStoreEndWrite(writeState); CStoreEndRead(readState); diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 3abb3c668..dbeddca2b 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -1,4 +1,4 @@ -SELECT count(*) AS columnar_table_count FROM cstore.cstore_tables \gset +SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset CREATE TABLE t(a int, b int) USING cstore_tableam; SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; count @@ -89,7 +89,7 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs (6 rows) -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands -SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; ?column? ---------- 1 @@ -97,7 +97,7 @@ SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; DROP TABLE t; -- Make sure we cleaned the metadata for t too -SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; ?column? ---------- 0 diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 6a5e0687e..8cb70167d 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -1,4 +1,4 @@ -SELECT count(*) AS columnar_table_count FROM cstore.cstore_tables \gset +SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset CREATE TABLE t(a int, b int) USING cstore_tableam; @@ -39,9 +39,9 @@ VACUUM FULL t; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands -SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; DROP TABLE t; -- Make sure we cleaned the metadata for t too -SELECT count(*) - :columnar_table_count FROM cstore.cstore_tables; +SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; From 2ede755107fc8389ef3b38a0e62f67ce4ae2fc93 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:34:52 -0700 Subject: [PATCH 076/124] Initial version of VACUUM --- cstore_tableam.c | 137 ++++++++++++++++++++++++++++++++++++++++- expected/am_vacuum.out | 52 ++++++++++++++++ sql/am_vacuum.sql | 20 ++++++ 3 files changed, 208 insertions(+), 1 deletion(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 39a0695e2..59df86fb2 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -32,6 +32,7 @@ #include "storage/procarray.h" #include "storage/smgr.h" #include "utils/builtins.h" +#include "utils/pg_rusage.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -40,6 +41,15 @@ #define CSTORE_TABLEAM_NAME "cstore_tableam" +/* + * Timing parameters for truncate locking heuristics. + * + * These are the same values from src/backend/access/heap/vacuumlazy.c + */ +#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */ +#define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ +#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ + typedef struct CStoreScanDescData { TableScanDescData cs_base; @@ -59,6 +69,9 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, void *arg); static bool IsCStoreTableAmTable(Oid relationId); + +static void TruncateCStore(Relation rel, int elevel); + static CStoreOptions * CStoreTableAMGetOptions(void) { @@ -575,6 +588,128 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, } +/* + * cstore_vacuum_rel implements VACUUM without FULL option. + */ +static void +cstore_vacuum_rel(Relation rel, VacuumParams *params, + BufferAccessStrategy bstrategy) +{ + int elevel = (params->options & VACOPT_VERBOSE) ? INFO : DEBUG2; + + /* this should have been resolved by vacuum.c until now */ + Assert(params->truncate != VACOPT_TERNARY_DEFAULT); + + /* + * We don't have updates, deletes, or concurrent updates, so all we + * care for now is truncating the unused space at the end of storage. + */ + if (params->truncate == VACOPT_TERNARY_ENABLED) + { + TruncateCStore(rel, elevel); + } +} + + +/* + * TruncateCStore truncates the unused space at the end of main fork for + * a cstore table. This unused space can be created by aborted transactions. + * + * This implementation is based on heap_vacuum_rel in vacuumlazy.c with some + * changes so it suits columnar store relations. + */ +static void +TruncateCStore(Relation rel, int elevel) +{ + PGRUsage ru0; + int lock_retry = 0; + BlockNumber old_rel_pages = 0; + BlockNumber new_rel_pages = 0; + DataFileMetadata *metadata = NULL; + ListCell *stripeMetadataCell = NULL; + + pg_rusage_init(&ru0); + + /* Report that we are now truncating */ + pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, + PROGRESS_VACUUM_PHASE_TRUNCATE); + + /* + * We need an ExclusiveLock to do the truncation. + * Loop until we acquire a lock or retry threshold is reached. + */ + while (true) + { + if (ConditionalLockRelation(rel, AccessExclusiveLock)) + { + break; + } + + /* + * Check for interrupts while trying to (re-)acquire the exclusive + * lock. + */ + CHECK_FOR_INTERRUPTS(); + + if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT / + VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) + { + /* + * We failed to establish the lock in the specified number of + * retries. This means we give up truncating. + */ + ereport(elevel, + (errmsg("\"%s\": stopping truncate due to conflicting lock request", + RelationGetRelationName(rel)))); + return; + } + + pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L); + } + + RelationOpenSmgr(rel); + old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + /* loop over stripes and find max used block */ + metadata = ReadDataFileMetadata(rel->rd_node.relNode); + foreach(stripeMetadataCell, metadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; + SmgrAddr addr = logical_to_smgr(lastByte); + new_rel_pages = Max(new_rel_pages, addr.blockno + 1); + } + + if (new_rel_pages == old_rel_pages) + { + UnlockRelation(rel, AccessExclusiveLock); + return; + } + + /* + * Truncate the storage. Note that RelationTruncate() takes care of + * Write Ahead Logging. + */ + RelationTruncate(rel, new_rel_pages); + + /* + * We can release the exclusive lock as soon as we have truncated. + * Other backends can't safely access the relation until they have + * processed the smgr invalidation that smgrtruncate sent out ... but + * that should happen as part of standard invalidation processing once + * they acquire lock on the relation. + */ + UnlockRelation(rel, AccessExclusiveLock); + + ereport(elevel, + (errmsg("\"%s\": truncated %u to %u pages", + RelationGetRelationName(rel), + old_rel_pages, new_rel_pages), + errdetail_internal("%s", pg_rusage_show(&ru0)))); +} + + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) @@ -853,7 +988,7 @@ static const TableAmRoutine cstore_am_methods = { .relation_nontransactional_truncate = cstore_relation_nontransactional_truncate, .relation_copy_data = cstore_relation_copy_data, .relation_copy_for_cluster = cstore_relation_copy_for_cluster, - .relation_vacuum = heap_vacuum_rel, + .relation_vacuum = cstore_vacuum_rel, .scan_analyze_next_block = cstore_scan_analyze_next_block, .scan_analyze_next_tuple = cstore_scan_analyze_next_tuple, .index_build_range_scan = cstore_index_build_range_scan, diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index dbeddca2b..7a1ff2777 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -95,6 +95,58 @@ SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; 1 (1 row) +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 16 kB +(1 row) + +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 56 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 12530 +(1 row) + +ROLLBACK TO SAVEPOINT s1; +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 56 kB +(1 row) + +COMMIT; +-- vacuum should truncate the relation to the usable space +VACUUM t; +SELECT pg_size_pretty(pg_relation_size('t')); + pg_size_pretty +---------------- + 16 kB +(1 row) + +SELECT count(*) FROM t; + count +------- + 2530 +(1 row) + DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 8cb70167d..10d1c7f6c 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -41,6 +41,26 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; +-- do this in a transaction so concurrent autovacuum doesn't interfere with results +BEGIN; +SAVEPOINT s1; +SELECT count(*) FROM t; +SELECT pg_size_pretty(pg_relation_size('t')); +INSERT INTO t SELECT i FROM generate_series(1, 10000) i; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; +ROLLBACK TO SAVEPOINT s1; + +-- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), +-- so relation size should be same as before. +SELECT pg_size_pretty(pg_relation_size('t')); +COMMIT; + +-- vacuum should truncate the relation to the usable space +VACUUM t; +SELECT pg_size_pretty(pg_relation_size('t')); +SELECT count(*) FROM t; + DROP TABLE t; -- Make sure we cleaned the metadata for t too From 74dd1facf36250e661e8b79f05bc562549ad9da3 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:43:03 -0700 Subject: [PATCH 077/124] add isolation tests --- Makefile | 18 ++++++++++ expected/am_vacuum_vs_insert.out | 58 ++++++++++++++++++++++++++++++++ expected/create.out | 6 ++++ specs/am_vacuum_vs_insert.spec | 47 ++++++++++++++++++++++++++ specs/create.spec | 8 +++++ 5 files changed, 137 insertions(+) create mode 100644 expected/am_vacuum_vs_insert.out create mode 100644 expected/create.out create mode 100644 specs/am_vacuum_vs_insert.spec create mode 100644 specs/create.spec diff --git a/Makefile b/Makefile index 60d8855f8..58340450f 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,7 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql REGRESS = extension_create +ISOLATION = create EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ sql/copyto.sql expected/block_filtering.out expected/create.out \ @@ -54,6 +55,7 @@ ifeq ($(USE_TABLEAM),yes) OBJS += cstore_tableam.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean + ISOLATION += am_vacuum_vs_insert endif ifeq ($(enable_coverage),yes) @@ -76,6 +78,22 @@ PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) +# command for getting postgres source directory is taken from citus/configure.in +POSTGRES_SRCDIR=$(shell grep ^abs_top_srcdir $(shell dirname $(shell $(PG_CONFIG) --pgxs))/../Makefile.global|cut -d ' ' -f3-) +PGXS_ISOLATION_TESTER=$(top_builddir)/src/test/isolation/pg_isolation_regress + +# If postgres installation doesn't include pg_isolation_regress, try using the +# one in postgres source directory. +ifeq (,$(wildcard $(PGXS_ISOLATION_TESTER))) + pg_isolation_regress_installcheck = \ + $(POSTGRES_SRCDIR)/src/test/isolation/pg_isolation_regress \ + --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) +else + pg_isolation_regress_installcheck = \ + $(PGXS_ISOLATION_TESTER) \ + --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) +endif + installcheck: reindent: diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out new file mode 100644 index 000000000..8ef78bfa4 --- /dev/null +++ b/expected/am_vacuum_vs_insert.out @@ -0,0 +1,58 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-insert s1-begin s1-insert s2-vacuum s1-commit s2-select +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s1-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-vacuum: + VACUUM test_vacuum_vs_insert; + +step s1-commit: + COMMIT; + +step s2-select: + SELECT * FROM test_vacuum_vs_insert; + +a b + +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 + +starting permutation: s1-insert s1-begin s1-insert s2-vacuum-full s1-commit s2-select +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s1-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-vacuum-full: + VACUUM FULL test_vacuum_vs_insert; + +step s1-commit: + COMMIT; + +step s2-vacuum-full: <... completed> +step s2-select: + SELECT * FROM test_vacuum_vs_insert; + +a b + +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 diff --git a/expected/create.out b/expected/create.out new file mode 100644 index 000000000..39b477c81 --- /dev/null +++ b/expected/create.out @@ -0,0 +1,6 @@ +Parsed test spec with 1 sessions + +starting permutation: s1a +step s1a: + CREATE EXTENSION cstore_fdw; + diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec new file mode 100644 index 000000000..57105e1dd --- /dev/null +++ b/specs/am_vacuum_vs_insert.spec @@ -0,0 +1,47 @@ +setup +{ + CREATE TABLE test_vacuum_vs_insert (a int, b int) USING cstore_tableam; +} + +teardown +{ + DROP TABLE IF EXISTS test_vacuum_vs_insert CASCADE; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-insert" +{ + INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-vacuum" +{ + VACUUM test_vacuum_vs_insert; +} + +step "s2-vacuum-full" +{ + VACUUM FULL test_vacuum_vs_insert; +} + +step "s2-select" +{ + SELECT * FROM test_vacuum_vs_insert; +} + +permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum" "s1-commit" "s2-select" +permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum-full" "s1-commit" "s2-select" + diff --git a/specs/create.spec b/specs/create.spec new file mode 100644 index 000000000..f8e874678 --- /dev/null +++ b/specs/create.spec @@ -0,0 +1,8 @@ +session "s1" +step "s1a" +{ + CREATE EXTENSION cstore_fdw; +} + +permutation "s1a" + From 37e3845e6afdb91bfd77940770da74ecd07ca698 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:47:09 -0700 Subject: [PATCH 078/124] Address Nils feedback --- cstore_tableam.c | 96 ++++++++++++++++++++++---------- expected/am_vacuum_vs_insert.out | 9 ++- specs/am_vacuum_vs_insert.spec | 4 +- 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index 59df86fb2..f6b51b770 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -46,7 +46,6 @@ * * These are the same values from src/backend/access/heap/vacuumlazy.c */ -#define VACUUM_TRUNCATE_LOCK_CHECK_INTERVAL 20 /* ms */ #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ #define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ @@ -68,6 +67,8 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, objectId, int subId, void *arg); static bool IsCStoreTableAmTable(Oid relationId); +static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, + int timeout, int retryInterval); static void TruncateCStore(Relation rel, int elevel); @@ -622,7 +623,6 @@ static void TruncateCStore(Relation rel, int elevel) { PGRUsage ru0; - int lock_retry = 0; BlockNumber old_rel_pages = 0; BlockNumber new_rel_pages = 0; DataFileMetadata *metadata = NULL; @@ -634,45 +634,46 @@ TruncateCStore(Relation rel, int elevel) pgstat_progress_update_param(PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_PHASE_TRUNCATE); + /* - * We need an ExclusiveLock to do the truncation. - * Loop until we acquire a lock or retry threshold is reached. + * We need access exclusive lock on the relation in order to do + * truncation. If we can't get it, give up rather than waiting --- we + * don't want to block other backends, and we don't want to deadlock + * (which is quite possible considering we already hold a lower-grade + * lock). + * + * The decisions for AccessExclusiveLock and conditional lock with + * a timeout is based on lazy_truncate_heap in vacuumlazy.c. */ - while (true) + if (!ConditionalLockRelationWithTimeout(rel, AccessExclusiveLock, + VACUUM_TRUNCATE_LOCK_TIMEOUT, + VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) { - if (ConditionalLockRelation(rel, AccessExclusiveLock)) - { - break; - } - /* - * Check for interrupts while trying to (re-)acquire the exclusive - * lock. + * We failed to establish the lock in the specified number of + * retries. This means we give up truncating. */ - CHECK_FOR_INTERRUPTS(); - - if (++lock_retry > (VACUUM_TRUNCATE_LOCK_TIMEOUT / - VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL)) - { - /* - * We failed to establish the lock in the specified number of - * retries. This means we give up truncating. - */ - ereport(elevel, - (errmsg("\"%s\": stopping truncate due to conflicting lock request", - RelationGetRelationName(rel)))); - return; - } - - pg_usleep(VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL * 1000L); + ereport(elevel, + (errmsg("\"%s\": stopping truncate due to conflicting lock request", + RelationGetRelationName(rel)))); + return; } RelationOpenSmgr(rel); old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - /* loop over stripes and find max used block */ + /* + * Get metadata as viewed in latest snapshot. Reading metadata in transaction + * snapshot is not enough, since stripes could have been created between + * current transaction start and lock acquisition time. Ignoring those + * stripes can destory data. + */ + PushActiveSnapshot(GetLatestSnapshot()); metadata = ReadDataFileMetadata(rel->rd_node.relNode); + PopActiveSnapshot(); + + /* loop over stripes and find max used block */ foreach(stripeMetadataCell, metadata->stripeMetadataList) { StripeMetadata *stripe = lfirst(stripeMetadataCell); @@ -710,6 +711,43 @@ TruncateCStore(Relation rel, int elevel) } +/* + * ConditionalLockRelationWithTimeout tries to acquire a relation lock until + * it either succeeds or timesout. It doesn't enter wait queue and instead it + * sleeps between lock tries. + * + * This is based on the lock loop in lazy_truncate_heap(). + */ +static bool +ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, + int retryInterval) +{ + int lock_retry = 0; + + while (true) + { + if (ConditionalLockRelation(rel, lockMode)) + { + break; + } + + /* + * Check for interrupts while trying to (re-)acquire the lock + */ + CHECK_FOR_INTERRUPTS(); + + if (++lock_retry > (timeout / retryInterval)) + { + return false; + } + + pg_usleep(retryInterval * 1000L); + } + + return true; +} + + static bool cstore_scan_analyze_next_block(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy) diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index 8ef78bfa4..ae23d9a26 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -10,8 +10,9 @@ step s1-begin: step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; +s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request step s2-vacuum: - VACUUM test_vacuum_vs_insert; + VACUUM VERBOSE test_vacuum_vs_insert; step s1-commit: COMMIT; @@ -39,11 +40,15 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; step s2-vacuum-full: - VACUUM FULL test_vacuum_vs_insert; + VACUUM FULL VERBOSE test_vacuum_vs_insert; step s1-commit: COMMIT; +s2: INFO: vacuuming "public.test_vacuum_vs_insert" +s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 1 pages +DETAIL: 0 dead row versions cannot be removed yet. +CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s. step s2-vacuum-full: <... completed> step s2-select: SELECT * FROM test_vacuum_vs_insert; diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec index 57105e1dd..ac2d83667 100644 --- a/specs/am_vacuum_vs_insert.spec +++ b/specs/am_vacuum_vs_insert.spec @@ -29,12 +29,12 @@ session "s2" step "s2-vacuum" { - VACUUM test_vacuum_vs_insert; + VACUUM VERBOSE test_vacuum_vs_insert; } step "s2-vacuum-full" { - VACUUM FULL test_vacuum_vs_insert; + VACUUM FULL VERBOSE test_vacuum_vs_insert; } step "s2-select" From 55885c81dd5cdb9c60cd0e23a27d681a4df97034 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 10:48:34 -0700 Subject: [PATCH 079/124] log stats on verbose --- cstore_tableam.c | 106 ++++++++++++++++++++++++++++++- expected/am_vacuum.out | 58 ++++++++++++++++- expected/am_vacuum_vs_insert.out | 5 ++ specs/am_vacuum_vs_insert.spec | 1 - specs/create.spec | 1 - sql/am_vacuum.sql | 38 ++++++++++- 6 files changed, 203 insertions(+), 6 deletions(-) diff --git a/cstore_tableam.c b/cstore_tableam.c index f6b51b770..fa3cd8739 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -69,8 +69,8 @@ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); - - +static void LogRelationStats(Relation rel, int elevel); +static char * CompressionTypeStr(CompressionType type); static void TruncateCStore(Relation rel, int elevel); static CStoreOptions * @@ -609,6 +609,108 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, { TruncateCStore(rel, elevel); } + + LogRelationStats(rel, elevel); +} + + +static void +LogRelationStats(Relation rel, int elevel) +{ + DataFileMetadata *datafileMetadata = NULL; + ListCell *stripeMetadataCell = NULL; + Oid relfilenode = rel->rd_node.relNode; + StringInfo infoBuf = makeStringInfo(); + + int compressionStats[COMPRESSION_COUNT] = { 0 }; + uint64 totalStripeLength = 0; + uint64 tupleCount = 0; + uint64 blockCount = 0; + uint64 relPages = 0; + int stripeCount = 0; + TupleDesc tupdesc = RelationGetDescr(rel); + uint64 droppedBlocksWithData = 0; + + datafileMetadata = ReadDataFileMetadata(relfilenode); + stripeCount = list_length(datafileMetadata->stripeMetadataList); + + foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + StripeSkipList *skiplist = ReadStripeSkipList(relfilenode, stripe->id, + RelationGetDescr(rel), + stripe->blockCount); + for (uint32 column = 0; column < skiplist->columnCount; column++) + { + bool attrDropped = tupdesc->attrs[column].attisdropped; + for (uint32 block = 0; block < skiplist->blockCount; block++) + { + ColumnBlockSkipNode *skipnode = + &skiplist->blockSkipNodeArray[column][block]; + + /* ignore zero length blocks for dropped attributes */ + if (skipnode->valueLength > 0) + { + compressionStats[skipnode->valueCompressionType]++; + blockCount++; + + if (attrDropped) + { + droppedBlocksWithData++; + } + } + } + } + + tupleCount += stripe->rowCount; + totalStripeLength += stripe->dataLength; + } + + RelationOpenSmgr(rel); + relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + RelationCloseSmgr(rel); + + appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n", + relPages * BLCKSZ, totalStripeLength); + appendStringInfo(infoBuf, + "total row count: %ld, stripe count: %d, " + "average rows per stripe: %ld\n", + tupleCount, stripeCount, tupleCount / stripeCount); + appendStringInfo(infoBuf, + "block count: %ld" + ", containing data for dropped columns: %ld", + blockCount, droppedBlocksWithData); + for (int compressionType = 0; compressionType < COMPRESSION_COUNT; compressionType++) + { + appendStringInfo(infoBuf, + ", %s compressed: %d", + CompressionTypeStr(compressionType), + compressionStats[compressionType]); + } + appendStringInfoString(infoBuf, "\n"); + + ereport(elevel, (errmsg("statistics for \"%s\":\n%s", RelationGetRelationName(rel), + infoBuf->data))); +} + + +/* + * CompressionTypeStr returns string representation of a compression type. + */ +static char * +CompressionTypeStr(CompressionType type) +{ + switch (type) + { + case COMPRESSION_NONE: + return "none"; + + case COMPRESSION_PG_LZ: + return "pglz"; + + default: + return "unknown"; + } } diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 7a1ff2777..9552f6ade 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -134,7 +134,14 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space -VACUUM t; +VACUUM VERBOSE t; +INFO: "t": truncated 7 to 2 pages +DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s +INFO: statistics for "t": +total file size: 16384, total data size: 10754 +total row count: 2530, stripe count: 3, average rows per stripe: 843 +block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 + SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- @@ -147,6 +154,55 @@ SELECT count(*) FROM t; 2530 (1 row) +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats +BEGIN; +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 2000; +SET cstore.compression TO "pglz"; +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SET cstore.compression TO "none"; +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 24576, total data size: 18808 +total row count: 5530, stripe count: 5, average rows per stripe: 1106 +block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 + +SELECT count(*) FROM t; + count +------- + 5530 +(1 row) + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 32768, total data size: 31372 +total row count: 7030, stripe count: 6, average rows per stripe: 1171 +block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SET cstore.compression TO "pglz"; +VACUUM FULL t; +VACUUM VERBOSE t; +INFO: statistics for "t": +total file size: 16384, total data size: 15728 +total row count: 7030, stripe count: 4, average rows per stripe: 1757 +block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 + DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index ae23d9a26..767604251 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -11,6 +11,11 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request +s2: INFO: statistics for "test_vacuum_vs_insert": +total file size: 8192, total data size: 26 +total row count: 3, stripe count: 1, average rows per stripe: 3 +block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 + step s2-vacuum: VACUUM VERBOSE test_vacuum_vs_insert; diff --git a/specs/am_vacuum_vs_insert.spec b/specs/am_vacuum_vs_insert.spec index ac2d83667..59c7274d5 100644 --- a/specs/am_vacuum_vs_insert.spec +++ b/specs/am_vacuum_vs_insert.spec @@ -44,4 +44,3 @@ step "s2-select" permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum" "s1-commit" "s2-select" permutation "s1-insert" "s1-begin" "s1-insert" "s2-vacuum-full" "s1-commit" "s2-select" - diff --git a/specs/create.spec b/specs/create.spec index f8e874678..09fc32131 100644 --- a/specs/create.spec +++ b/specs/create.spec @@ -5,4 +5,3 @@ step "s1a" } permutation "s1a" - diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index 10d1c7f6c..f7f9d77bd 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -57,10 +57,46 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space -VACUUM t; +VACUUM VERBOSE t; SELECT pg_size_pretty(pg_relation_size('t')); SELECT count(*) FROM t; +-- add some stripes with different compression types and create some gaps, +-- then vacuum to print stats + +BEGIN; +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 2000; +SET cstore.compression TO "pglz"; +SAVEPOINT s1; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s1; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +SET cstore.compression TO "none"; +SAVEPOINT s2; +INSERT INTO t SELECT i FROM generate_series(1, 1500) i; +ROLLBACK TO SAVEPOINT s2; +INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; +COMMIT; + +VACUUM VERBOSE t; + +SELECT count(*) FROM t; + +-- check that we report blocks with data for dropped columns +ALTER TABLE t ADD COLUMN c int; +INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; +ALTER TABLE t DROP COLUMN c; + +VACUUM VERBOSE t; + +-- vacuum full should remove blocks for dropped columns +-- note that, a block will be stored in non-compressed for if compression +-- doesn't reduce its size. +SET cstore.compression TO "pglz"; +VACUUM FULL t; +VACUUM VERBOSE t; + DROP TABLE t; -- Make sure we cleaned the metadata for t too From 76a71aa61a283e2e973d235d211d9db328fab425 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 5 Oct 2020 14:18:29 -0700 Subject: [PATCH 080/124] Use SnapshotDirty for reading metadata in truncation --- cstore.h | 1 + cstore_metadata_tables.c | 3 ++- cstore_tableam.c | 12 ++---------- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/cstore.h b/cstore.h index 489a4839b..c84588627 100644 --- a/cstore.h +++ b/cstore.h @@ -20,6 +20,7 @@ #include "storage/bufpage.h" #include "storage/lockdefs.h" #include "utils/relcache.h" +#include "utils/snapmgr.h" /* Defines for valid option names */ #define OPTION_NAME_COMPRESSION_TYPE "compression" diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 7c214eed5..565a37b07 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -377,7 +377,8 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); tupleDescriptor = RelationGetDescr(cstoreStripes); - scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, scanKey); + scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, + scanKey); while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { diff --git a/cstore_tableam.c b/cstore_tableam.c index fa3cd8739..4e9d47260 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -631,7 +631,7 @@ LogRelationStats(Relation rel, int elevel) TupleDesc tupdesc = RelationGetDescr(rel); uint64 droppedBlocksWithData = 0; - datafileMetadata = ReadDataFileMetadata(relfilenode); + datafileMetadata = ReadDataFileMetadata(relfilenode, false); stripeCount = list_length(datafileMetadata->stripeMetadataList); foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) @@ -765,15 +765,7 @@ TruncateCStore(Relation rel, int elevel) old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - /* - * Get metadata as viewed in latest snapshot. Reading metadata in transaction - * snapshot is not enough, since stripes could have been created between - * current transaction start and lock acquisition time. Ignoring those - * stripes can destory data. - */ - PushActiveSnapshot(GetLatestSnapshot()); - metadata = ReadDataFileMetadata(rel->rd_node.relNode); - PopActiveSnapshot(); + metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); /* loop over stripes and find max used block */ foreach(stripeMetadataCell, metadata->stripeMetadataList) From e481e73d18722121c67a625b31e7732f01f545c6 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Thu, 8 Oct 2020 16:02:45 -0700 Subject: [PATCH 081/124] Encapsulate snapshot used for reading stripes in cstore_metadata_tables --- cstore.h | 1 + cstore_metadata_tables.c | 68 +++++++++++++++++++++++++------- cstore_tableam.c | 25 ++++++------ expected/am_vacuum.out | 6 +-- expected/am_vacuum_vs_insert.out | 2 +- 5 files changed, 70 insertions(+), 32 deletions(-) diff --git a/cstore.h b/cstore.h index c84588627..8a64730c8 100644 --- a/cstore.h +++ b/cstore.h @@ -285,6 +285,7 @@ extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); +extern uint64 GetHighestUsedAddress(Oid relfilenode); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 565a37b07..d5ad28388 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -43,6 +43,7 @@ typedef struct EState *estate; } ModifyState; +static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot); static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); static Oid CStoreDataFilesRelationId(void); @@ -345,17 +346,8 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk) { - Oid cstoreStripesOid = InvalidOid; - Relation cstoreStripes = NULL; - Relation index = NULL; - TupleDesc tupleDescriptor = NULL; - ScanKeyData scanKey[1]; - SysScanDesc scanDescriptor = NULL; - HeapTuple heapTuple; - bool found = false; - DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); - found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); + bool found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); if (!found) { if (!missingOk) @@ -369,6 +361,56 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) } } + datafileMetadata->stripeMetadataList = + ReadDataFileStripeList(relfilenode, GetTransactionSnapshot()); + + return datafileMetadata; +} + + +/* + * GetHighestUsedAddress returns the highest used address for the given + * relfilenode across all active and inactive transactions. + */ +uint64 +GetHighestUsedAddress(Oid relfilenode) +{ + uint64 highestUsedAddress = 0; + ListCell *stripeMetadataCell = NULL; + List *stripeMetadataList = NIL; + + SnapshotData SnapshotDirty; + InitDirtySnapshot(SnapshotDirty); + + stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); + + foreach(stripeMetadataCell, stripeMetadataList) + { + StripeMetadata *stripe = lfirst(stripeMetadataCell); + uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; + highestUsedAddress = Max(highestUsedAddress, lastByte); + } + + return highestUsedAddress; +} + + +/* + * ReadDataFileStripeList reads the stripe list for a given relfilenode + * in the given snapshot. + */ +static List * +ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) +{ + List *stripeMetadataList = NIL; + Oid cstoreStripesOid = InvalidOid; + Relation cstoreStripes = NULL; + Relation index = NULL; + TupleDesc tupleDescriptor = NULL; + ScanKeyData scanKey[1]; + SysScanDesc scanDescriptor = NULL; + HeapTuple heapTuple; + ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); @@ -403,16 +445,14 @@ ReadDataFileMetadata(Oid relfilenode, bool missingOk) stripeMetadata->rowCount = DatumGetInt64( datumArray[Anum_cstore_stripes_row_count - 1]); - datafileMetadata->stripeMetadataList = lappend( - datafileMetadata->stripeMetadataList, - stripeMetadata); + stripeMetadataList = lappend(stripeMetadataList, stripeMetadata); } systable_endscan_ordered(scanDescriptor); index_close(index, NoLock); heap_close(cstoreStripes, NoLock); - return datafileMetadata; + return stripeMetadataList; } diff --git a/cstore_tableam.c b/cstore_tableam.c index 4e9d47260..0840436ec 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -601,6 +601,8 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, /* this should have been resolved by vacuum.c until now */ Assert(params->truncate != VACOPT_TERNARY_DEFAULT); + LogRelationStats(rel, elevel); + /* * We don't have updates, deletes, or concurrent updates, so all we * care for now is truncating the unused space at the end of storage. @@ -609,8 +611,6 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, { TruncateCStore(rel, elevel); } - - LogRelationStats(rel, elevel); } @@ -727,8 +727,7 @@ TruncateCStore(Relation rel, int elevel) PGRUsage ru0; BlockNumber old_rel_pages = 0; BlockNumber new_rel_pages = 0; - DataFileMetadata *metadata = NULL; - ListCell *stripeMetadataCell = NULL; + SmgrAddr highestPhysicalAddress; pg_rusage_init(&ru0); @@ -765,17 +764,15 @@ TruncateCStore(Relation rel, int elevel) old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); - metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); - - /* loop over stripes and find max used block */ - foreach(stripeMetadataCell, metadata->stripeMetadataList) - { - StripeMetadata *stripe = lfirst(stripeMetadataCell); - uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; - SmgrAddr addr = logical_to_smgr(lastByte); - new_rel_pages = Max(new_rel_pages, addr.blockno + 1); - } + /* + * Due to the AccessExclusive lock there's no danger that + * new stripes be added beyond highestPhysicalAddress while + * we're truncating. + */ + highestPhysicalAddress = + logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode)); + new_rel_pages = highestPhysicalAddress.blockno + 1; if (new_rel_pages == old_rel_pages) { UnlockRelation(rel, AccessExclusiveLock); diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 9552f6ade..3db30a761 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -135,13 +135,13 @@ SELECT pg_size_pretty(pg_relation_size('t')); COMMIT; -- vacuum should truncate the relation to the usable space VACUUM VERBOSE t; -INFO: "t": truncated 7 to 2 pages -DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s INFO: statistics for "t": -total file size: 16384, total data size: 10754 +total file size: 57344, total data size: 10754 total row count: 2530, stripe count: 3, average rows per stripe: 843 block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 +INFO: "t": truncated 7 to 2 pages +DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index 767604251..a3eb0fb89 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -10,12 +10,12 @@ step s1-begin: step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; -s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request s2: INFO: statistics for "test_vacuum_vs_insert": total file size: 8192, total data size: 26 total row count: 3, stripe count: 1, average rows per stripe: 3 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 +s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request step s2-vacuum: VACUUM VERBOSE test_vacuum_vs_insert; From 102b7670d40cba7731ce2f4763965d757db28a3e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Fri, 9 Oct 2020 12:35:42 -0700 Subject: [PATCH 082/124] Fix tautological compare issue (#19) --- .gitignore | 1 + cstore_metadata_tables.c | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index aa7be0e36..6b3554f3b 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ *.ko *.obj *.elf +*.bc # Libraries *.lib diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 7c214eed5..171860762 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -205,7 +205,7 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount) { StripeSkipList *skipList = NULL; - uint32 columnIndex = 0; + int32 columnIndex = 0; Oid cstoreSkipNodesOid = InvalidOid; Relation cstoreSkipNodes = NULL; Relation index = NULL; @@ -237,8 +237,8 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { - uint32 attr = 0; - uint32 blockIndex = 0; + int32 attr = 0; + int32 blockIndex = 0; ColumnBlockSkipNode *skipNode = NULL; Datum datumArray[Natts_cstore_skipnodes]; @@ -253,13 +253,13 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, if (attr <= 0 || attr > columnCount) { ereport(ERROR, (errmsg("invalid stripe skipnode entry"), - errdetail("Attribute number out of range: %u", attr))); + errdetail("Attribute number out of range: %d", attr))); } if (blockIndex < 0 || blockIndex >= blockCount) { ereport(ERROR, (errmsg("invalid stripe skipnode entry"), - errdetail("Block number out of range: %u", blockIndex))); + errdetail("Block number out of range: %d", blockIndex))); } columnIndex = attr - 1; From 5fc7f61936367dce31179b5b6dc2b83eb61e61ba Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 13 Oct 2020 13:36:02 +0200 Subject: [PATCH 083/124] Projection pushdown (#11) DESCRIPTION: add pushdown support for projections and quals in table access method scan This implementation uses custom scans to push projections into the scans on a columnar table. The custom scan replaces all access paths to a table to force the projection of the columns. --- Makefile | 6 +- cstore.h | 1 + cstore_customscan.c | 426 +++++++++++++++++++++++++++++++++++++++++++ cstore_customscan.h | 19 ++ cstore_reader.c | 14 ++ cstore_tableam.c | 46 ++++- cstore_tableam.h | 8 + expected/am_join.out | 37 ++++ sql/am_join.sql | 28 +++ 9 files changed, 579 insertions(+), 6 deletions(-) create mode 100644 cstore_customscan.c create mode 100644 cstore_customscan.h create mode 100644 expected/am_join.out create mode 100644 sql/am_join.sql diff --git a/Makefile b/Makefile index 58340450f..7e8bee13a 100644 --- a/Makefile +++ b/Makefile @@ -49,12 +49,12 @@ ifeq ($(USE_FDW),yes) fdw_copyto fdw_alter fdw_rollback fdw_truncate fdw_clean endif -# disabled tests: am_block_filtering ifeq ($(USE_TABLEAM),yes) PG_CFLAGS += -DUSE_TABLEAM - OBJS += cstore_tableam.o + OBJS += cstore_tableam.o cstore_customscan.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean + am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ + am_block_filtering am_join ISOLATION += am_vacuum_vs_insert endif diff --git a/cstore.h b/cstore.h index 8a64730c8..f5e0590a8 100644 --- a/cstore.h +++ b/cstore.h @@ -267,6 +267,7 @@ extern TableReadState * CStoreBeginRead(Relation relation, extern bool CStoreReadFinished(TableReadState *state); extern bool CStoreReadNextRow(TableReadState *state, Datum *columnValues, bool *columnNulls); +extern void CStoreRescan(TableReadState *readState); extern void CStoreEndRead(TableReadState *state); /* Function declarations for common functions */ diff --git a/cstore_customscan.c b/cstore_customscan.c new file mode 100644 index 000000000..0dcdff111 --- /dev/null +++ b/cstore_customscan.c @@ -0,0 +1,426 @@ +/*------------------------------------------------------------------------- + * + * cstore_customscan.c + * + * This file contains the implementation of a postgres custom scan that + * we use to push down the projections into the table access methods. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/skey.h" +#include "nodes/extensible.h" +#include "nodes/pg_list.h" +#include "nodes/plannodes.h" +#include "optimizer/optimizer.h" +#include "optimizer/pathnode.h" +#include "optimizer/paths.h" +#include "optimizer/restrictinfo.h" +#include "utils/relcache.h" + +#include "cstore.h" +#include "cstore_customscan.h" +#include "cstore_tableam.h" + +typedef struct CStoreScanPath +{ + CustomPath custom_path; + + /* place for local state during planning */ +} CStoreScanPath; + +typedef struct CStoreScanScan +{ + CustomScan custom_scan; + + /* place for local state during execution */ +} CStoreScanScan; + +typedef struct CStoreScanState +{ + CustomScanState custom_scanstate; + + List *qual; +} CStoreScanState; + + +static void CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, + RangeTblEntry *rte); +static Path * CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte); +static Cost CStoreScanCost(RangeTblEntry *rte); +static Plan * CStoreScanPath_PlanCustomPath(PlannerInfo *root, + RelOptInfo *rel, + struct CustomPath *best_path, + List *tlist, + List *clauses, + List *custom_plans); + +static Node * CStoreScan_CreateCustomScanState(CustomScan *cscan); + +static void CStoreScan_BeginCustomScan(CustomScanState *node, EState *estate, int eflags); +static TupleTableSlot * CStoreScan_ExecCustomScan(CustomScanState *node); +static void CStoreScan_EndCustomScan(CustomScanState *node); +static void CStoreScan_ReScanCustomScan(CustomScanState *node); + +/* saved hook value in case of unload */ +static set_rel_pathlist_hook_type PreviousSetRelPathlistHook = NULL; + +static bool EnableCStoreCustomScan = true; + + +const struct CustomPathMethods CStoreScanPathMethods = { + .CustomName = "CStoreScan", + .PlanCustomPath = CStoreScanPath_PlanCustomPath, +}; + +const struct CustomScanMethods CStoreScanScanMethods = { + .CustomName = "CStoreScan", + .CreateCustomScanState = CStoreScan_CreateCustomScanState, +}; + +const struct CustomExecMethods CStoreExecuteMethods = { + .CustomName = "CStoreScan", + + .BeginCustomScan = CStoreScan_BeginCustomScan, + .ExecCustomScan = CStoreScan_ExecCustomScan, + .EndCustomScan = CStoreScan_EndCustomScan, + .ReScanCustomScan = CStoreScan_ReScanCustomScan, + + .ExplainCustomScan = NULL, +}; + + +/* + * cstore_customscan_init installs the hook required to intercept the postgres planner and + * provide extra paths for cstore tables + */ +void +cstore_customscan_init() +{ + PreviousSetRelPathlistHook = set_rel_pathlist_hook; + set_rel_pathlist_hook = CStoreSetRelPathlistHook; + + /* register customscan specific GUC's */ + DefineCustomBoolVariable( + "cstore.enable_custom_scan", + gettext_noop("Enables the use of a custom scan to push projections and quals " + "into the storage layer"), + NULL, + &EnableCStoreCustomScan, + true, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); +} + + +static void +clear_paths(RelOptInfo *rel) +{ + rel->pathlist = NULL; + rel->partial_pathlist = NULL; + rel->cheapest_startup_path = NULL; + rel->cheapest_total_path = NULL; + rel->cheapest_unique_path = NULL; +} + + +static void +CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, + RangeTblEntry *rte) +{ + /* call into previous hook if assigned */ + if (PreviousSetRelPathlistHook) + { + PreviousSetRelPathlistHook(root, rel, rti, rte); + } + + if (!EnableCStoreCustomScan) + { + /* custon scans are disabled, use normal table access method api instead */ + return; + } + + if (!OidIsValid(rte->relid)) + { + /* some calls to the pathlist hook don't have a valid relation set. Do nothing */ + return; + } + + /* + * Here we want to inspect if this relation pathlist hook is accessing a cstore table. + * If that is the case we want to insert an extra path that pushes down the projection + * into the scan of the table to minimize the data read. + */ + Relation relation = RelationIdGetRelation(rte->relid); + if (relation->rd_tableam == GetCstoreTableAmRoutine()) + { + ereport(DEBUG1, (errmsg("pathlist hook for cstore table am"))); + + /* we propose a new path that will be the only path for scanning this relation */ + Path *customPath = CreateCStoreScanPath(rel, rte); + clear_paths(rel); + add_path(rel, customPath); + } + RelationClose(relation); +} + + +static Path * +CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte) +{ + CStoreScanPath *cspath = (CStoreScanPath *) newNode(sizeof(CStoreScanPath), + T_CustomPath); + + /* + * popuate custom path information + */ + CustomPath *cpath = &cspath->custom_path; + cpath->methods = &CStoreScanPathMethods; + + /* + * populate generic path information + */ + Path *path = &cpath->path; + path->pathtype = T_CustomScan; + path->parent = rel; + path->pathtarget = rel->reltarget; + + /* + * Add cost estimates for a cstore table scan, row count is the rows estimated by + * postgres' planner. + */ + path->rows = rel->rows; + path->startup_cost = 0; + path->total_cost = path->startup_cost + CStoreScanCost(rte); + + return (Path *) cspath; +} + + +/* + * CStoreScanCost calculates the cost of scanning the cstore table. The cost is estimated + * by using all stripe metadata to estimate based on the columns to read how many pages + * need to be read. + */ +static Cost +CStoreScanCost(RangeTblEntry *rte) +{ + Relation rel = RelationIdGetRelation(rte->relid); + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + RelationClose(rel); + rel = NULL; + + uint32 maxColumnCount = 0; + uint64 totalStripeSize = 0; + ListCell *stripeMetadataCell = NULL; + foreach(stripeMetadataCell, metadata->stripeMetadataList) + { + StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); + totalStripeSize += stripeMetadata->dataLength; + maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount); + } + + Bitmapset *attr_needed = rte->selectedCols; + double numberOfColumnsRead = bms_num_members(attr_needed); + double selectionRatio = numberOfColumnsRead / (double) maxColumnCount; + Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio; + + return scanCost; +} + + +static Plan * +CStoreScanPath_PlanCustomPath(PlannerInfo *root, + RelOptInfo *rel, + struct CustomPath *best_path, + List *tlist, + List *clauses, + List *custom_plans) +{ + CStoreScanScan *plan = (CStoreScanScan *) newNode(sizeof(CStoreScanScan), + T_CustomScan); + + CustomScan *cscan = &plan->custom_scan; + cscan->methods = &CStoreScanScanMethods; + + /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ + clauses = extract_actual_clauses(clauses, false); + + cscan->scan.plan.targetlist = list_copy(tlist); + cscan->scan.plan.qual = clauses; + cscan->scan.scanrelid = best_path->path.parent->relid; + + return (Plan *) plan; +} + + +static Node * +CStoreScan_CreateCustomScanState(CustomScan *cscan) +{ + CStoreScanState *cstorescanstate = (CStoreScanState *) newNode( + sizeof(CStoreScanState), T_CustomScanState); + + CustomScanState *cscanstate = &cstorescanstate->custom_scanstate; + cscanstate->methods = &CStoreExecuteMethods; + + cstorescanstate->qual = cscan->scan.plan.qual; + + return (Node *) cscanstate; +} + + +static void +CStoreScan_BeginCustomScan(CustomScanState *cscanstate, EState *estate, int eflags) +{ + /* scan slot is already initialized */ +} + + +static Bitmapset * +CStoreAttrNeeded(ScanState *ss) +{ + TupleTableSlot *slot = ss->ss_ScanTupleSlot; + int natts = slot->tts_tupleDescriptor->natts; + Bitmapset *attr_needed = NULL; + Plan *plan = ss->ps.plan; + int flags = PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS; + List *vars = list_concat(pull_var_clause((Node *) plan->targetlist, flags), + pull_var_clause((Node *) plan->qual, flags)); + ListCell *lc; + + foreach(lc, vars) + { + Var *var = lfirst(lc); + + if (var->varattno == 0) + { + elog(DEBUG1, "Need attribute: all"); + + /* all attributes are required, we don't need to add more so break*/ + attr_needed = bms_add_range(attr_needed, 0, natts - 1); + break; + } + + elog(DEBUG1, "Need attribute: %d", var->varattno); + attr_needed = bms_add_member(attr_needed, var->varattno - 1); + } + + return attr_needed; +} + + +static TupleTableSlot * +CStoreScanNext(CStoreScanState *cstorescanstate) +{ + CustomScanState *node = (CustomScanState *) cstorescanstate; + TableScanDesc scandesc; + EState *estate; + ScanDirection direction; + TupleTableSlot *slot; + + /* + * get information from the estate and scan state + */ + scandesc = node->ss.ss_currentScanDesc; + estate = node->ss.ps.state; + direction = estate->es_direction; + slot = node->ss.ss_ScanTupleSlot; + + if (scandesc == NULL) + { + /* the cstore access method does not use the flags, they are specific to heap */ + uint32 flags = 0; + Bitmapset *attr_needed = CStoreAttrNeeded(&node->ss); + + /* + * We reach here if the scan is not parallel, or if we're serially + * executing a scan that was planned to be parallel. + */ + scandesc = cstore_beginscan_extended(node->ss.ss_currentRelation, + estate->es_snapshot, + 0, NULL, NULL, flags, attr_needed, + cstorescanstate->qual); + bms_free(attr_needed); + + node->ss.ss_currentScanDesc = scandesc; + } + + /* + * get the next tuple from the table + */ + if (table_scan_getnextslot(scandesc, direction, slot)) + { + return slot; + } + return NULL; +} + + +/* + * SeqRecheck -- access method routine to recheck a tuple in EvalPlanQual + */ +static bool +CStoreScanRecheck(CStoreScanState *node, TupleTableSlot *slot) +{ + return true; +} + + +static TupleTableSlot * +CStoreScan_ExecCustomScan(CustomScanState *node) +{ + return ExecScan(&node->ss, + (ExecScanAccessMtd) CStoreScanNext, + (ExecScanRecheckMtd) CStoreScanRecheck); +} + + +static void +CStoreScan_EndCustomScan(CustomScanState *node) +{ + TableScanDesc scanDesc; + + /* + * get information from node + */ + scanDesc = node->ss.ss_currentScanDesc; + + /* + * Free the exprcontext + */ + ExecFreeExprContext(&node->ss.ps); + + /* + * clean out the tuple table + */ + if (node->ss.ps.ps_ResultTupleSlot) + { + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + } + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close heap scan + */ + if (scanDesc != NULL) + { + table_endscan(scanDesc); + } +} + + +static void +CStoreScan_ReScanCustomScan(CustomScanState *node) +{ + TableScanDesc scanDesc = node->ss.ss_currentScanDesc; + if (scanDesc != NULL) + { + table_rescan(node->ss.ss_currentScanDesc, NULL); + } +} diff --git a/cstore_customscan.h b/cstore_customscan.h new file mode 100644 index 000000000..9e388e13f --- /dev/null +++ b/cstore_customscan.h @@ -0,0 +1,19 @@ +/*------------------------------------------------------------------------- + * + * cstore_customscan.h + * + * Forward declarations of functions to hookup the custom scan feature of + * cstore. + * + * $Id$ + * + *------------------------------------------------------------------------- + */ + +#ifndef CSTORE_FDW_CSTORE_CUSTOMSCAN_H +#define CSTORE_FDW_CSTORE_CUSTOMSCAN_H + +void cstore_customscan_init(void); + + +#endif /*CSTORE_FDW_CSTORE_CUSTOMSCAN_H */ diff --git a/cstore_reader.c b/cstore_reader.c index cf2d0b171..c86021f7e 100644 --- a/cstore_reader.c +++ b/cstore_reader.c @@ -29,6 +29,7 @@ #endif #include "optimizer/restrictinfo.h" #include "storage/fd.h" +#include "utils/guc.h" #include "utils/memutils.h" #include "utils/lsyscache.h" #include "utils/rel.h" @@ -222,6 +223,19 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu } +/* + * CStoreRescan clears the position where we were scanning so that the next read starts at + * the beginning again + */ +void +CStoreRescan(TableReadState *readState) +{ + readState->stripeBuffers = NULL; + readState->readStripeCount = 0; + readState->stripeReadRowCount = 0; +} + + /* Finishes a cstore read operation. */ void CStoreEndRead(TableReadState *readState) diff --git a/cstore_tableam.c b/cstore_tableam.c index 0840436ec..eae806e59 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -37,6 +37,7 @@ #include "utils/syscache.h" #include "cstore.h" +#include "cstore_customscan.h" #include "cstore_tableam.h" #define CSTORE_TABLEAM_NAME "cstore_tableam" @@ -154,7 +155,7 @@ RelationColumnList(Relation rel) for (int i = 0; i < tupdesc->natts; i++) { - Index varno = 0; + Index varno = 1; AttrNumber varattno = i + 1; Oid vartype = tupdesc->attrs[i].atttypid; int32 vartypmod = tupdesc->attrs[i].atttypmod; @@ -188,11 +189,36 @@ cstore_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, uint32 flags) +{ + TableScanDesc scandesc; + int natts = relation->rd_att->natts; + Bitmapset *attr_needed = NULL; + + attr_needed = bms_add_range(attr_needed, 0, natts - 1); + + /* the cstore access method does not use the flags, they are specific to heap */ + flags = 0; + + scandesc = cstore_beginscan_extended(relation, snapshot, nkeys, key, parallel_scan, + flags, attr_needed, NULL); + + pfree(attr_needed); + + return scandesc; +} + + +TableScanDesc +cstore_beginscan_extended(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags, Bitmapset *attr_needed, List *scanQual) { TupleDesc tupdesc = relation->rd_att; TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *columnList = NIL; + List *neededColumnList = NIL; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); scan->cs_base.rs_rd = relation; @@ -204,7 +230,18 @@ cstore_beginscan(Relation relation, Snapshot snapshot, columnList = RelationColumnList(relation); - readState = CStoreBeginRead(relation, tupdesc, columnList, NULL); + /* only collect columns that we need for the scan */ + ListCell *columnCell = NULL; + foreach(columnCell, columnList) + { + Var *var = castNode(Var, lfirst(columnCell)); + if (bms_is_member(var->varattno - 1, attr_needed)) + { + neededColumnList = lappend(neededColumnList, var); + } + } + + readState = CStoreBeginRead(relation, tupdesc, neededColumnList, scanQual); scan->cs_readState = readState; @@ -226,7 +263,8 @@ static void cstore_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode) { - elog(ERROR, "cstore_rescan not implemented"); + CStoreScanDesc scan = (CStoreScanDesc) sscan; + CStoreRescan(scan->cs_readState); } @@ -996,6 +1034,8 @@ cstore_tableam_init() ExecutorEnd_hook = CStoreExecutorEnd; prevObjectAccessHook = object_access_hook; object_access_hook = CStoreTableAMObjectAccessHook; + + cstore_customscan_init(); } diff --git a/cstore_tableam.h b/cstore_tableam.h index bdf7f96c0..557506b9f 100644 --- a/cstore_tableam.h +++ b/cstore_tableam.h @@ -1,7 +1,15 @@ #include "postgres.h" #include "fmgr.h" #include "access/tableam.h" +#include "access/skey.h" +#include "nodes/bitmapset.h" const TableAmRoutine * GetCstoreTableAmRoutine(void); extern void cstore_tableam_init(void); extern void cstore_tableam_finish(void); + +extern TableScanDesc cstore_beginscan_extended(Relation relation, Snapshot snapshot, + int nkeys, ScanKey key, + ParallelTableScanDesc parallel_scan, + uint32 flags, Bitmapset *attr_needed, + List *scanQual); diff --git a/expected/am_join.out b/expected/am_join.out new file mode 100644 index 000000000..fbb628187 --- /dev/null +++ b/expected/am_join.out @@ -0,0 +1,37 @@ +CREATE SCHEMA am_cstore_join; +SET search_path TO am_cstore_join; +CREATE TABLE users (id int, name text) USING cstore_tableam; +INSERT INTO users SELECT a, 'name' || a FROM generate_series(0,30-1) AS a; +CREATE TABLE things (id int, user_id int, name text) USING cstore_tableam; +INSERT INTO things SELECT a, a % 30, 'thing' || a FROM generate_series(1,300) AS a; +-- force the nested loop to rescan the table +SET enable_material TO off; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 290; + count +------- + 10 +(1 row) + +-- verify the join uses a nested loop to trigger the rescan behaviour +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 299990; + QUERY PLAN +-------------------------------------------------- + Aggregate + -> Nested Loop + Join Filter: (users.id = things.user_id) + -> Custom Scan (CStoreScan) on things + Filter: (id > 299990) + -> Custom Scan (CStoreScan) on users +(6 rows) + +SET client_min_messages TO warning; +DROP SCHEMA am_cstore_join CASCADE; diff --git a/sql/am_join.sql b/sql/am_join.sql new file mode 100644 index 000000000..4d78dfe5b --- /dev/null +++ b/sql/am_join.sql @@ -0,0 +1,28 @@ +CREATE SCHEMA am_cstore_join; +SET search_path TO am_cstore_join; + +CREATE TABLE users (id int, name text) USING cstore_tableam; +INSERT INTO users SELECT a, 'name' || a FROM generate_series(0,30-1) AS a; + +CREATE TABLE things (id int, user_id int, name text) USING cstore_tableam; +INSERT INTO things SELECT a, a % 30, 'thing' || a FROM generate_series(1,300) AS a; + +-- force the nested loop to rescan the table +SET enable_material TO off; +SET enable_hashjoin TO off; +SET enable_mergejoin TO off; + +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 290; + +-- verify the join uses a nested loop to trigger the rescan behaviour +EXPLAIN (COSTS OFF) +SELECT count(*) +FROM users +JOIN things ON (users.id = things.user_id) +WHERE things.id > 299990; + +SET client_min_messages TO warning; +DROP SCHEMA am_cstore_join CASCADE; From 4355ca494541903fe8b1f2abaed29c4f8357f959 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 14 Oct 2020 13:56:58 -0700 Subject: [PATCH 084/124] trigger fix and tests --- Makefile | 2 +- cstore_customscan.c | 2 +- cstore_tableam.c | 59 +++++++++++++++++++++++++++++++++++++ expected/am_trigger.out | 65 +++++++++++++++++++++++++++++++++++++++++ sql/am_trigger.sql | 61 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 187 insertions(+), 2 deletions(-) create mode 100644 expected/am_trigger.out create mode 100644 sql/am_trigger.sql diff --git a/Makefile b/Makefile index 7e8bee13a..0d581f145 100644 --- a/Makefile +++ b/Makefile @@ -54,7 +54,7 @@ ifeq ($(USE_TABLEAM),yes) OBJS += cstore_tableam.o cstore_customscan.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ - am_block_filtering am_join + am_block_filtering am_join am_trigger ISOLATION += am_vacuum_vs_insert endif diff --git a/cstore_customscan.c b/cstore_customscan.c index 0dcdff111..d7e6eb667 100644 --- a/cstore_customscan.c +++ b/cstore_customscan.c @@ -145,7 +145,7 @@ CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, return; } - if (!OidIsValid(rte->relid)) + if (!OidIsValid(rte->relid) || rte->rtekind != RTE_RELATION) { /* some calls to the pathlist hook don't have a valid relation set. Do nothing */ return; diff --git a/cstore_tableam.c b/cstore_tableam.c index eae806e59..09a65d75b 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -16,6 +16,7 @@ #include "catalog/index.h" #include "catalog/objectaccess.h" #include "catalog/pg_am.h" +#include "catalog/pg_trigger.h" #include "catalog/storage.h" #include "catalog/storage_xlog.h" #include "commands/progress.h" @@ -31,6 +32,7 @@ #include "storage/predicate.h" #include "storage/procarray.h" #include "storage/smgr.h" +#include "tcop/utility.h" #include "utils/builtins.h" #include "utils/pg_rusage.h" #include "utils/rel.h" @@ -62,11 +64,19 @@ static TableWriteState *CStoreWriteState = NULL; static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; static MemoryContext CStoreContext = NULL; static object_access_hook_type prevObjectAccessHook = NULL; +static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; /* forward declaration for static functions */ static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, int subId, void *arg); +static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + char *completionTag); static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); @@ -1027,11 +1037,60 @@ CStoreExecutorEnd(QueryDesc *queryDesc) } +static void +CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + char *completionTag) +{ + Node *parseTree = plannedStatement->utilityStmt; + + if (nodeTag(parseTree) == T_CreateTrigStmt) + { + CreateTrigStmt *createTrigStmt = (CreateTrigStmt *) parseTree; + Relation rel; + bool isCStore; + + rel = relation_openrv(createTrigStmt->relation, AccessShareLock); + isCStore = rel->rd_tableam == GetCstoreTableAmRoutine(); + relation_close(rel, AccessShareLock); + + if (isCStore && + createTrigStmt->row && + createTrigStmt->timing == TRIGGER_TYPE_AFTER) + { + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg( + "AFTER ROW triggers are not supported for columnstore access method"), + errhint("Consider an AFTER STATEMENT trigger instead."))); + } + } + + if (PreviousProcessUtilityHook != NULL) + { + PreviousProcessUtilityHook(plannedStatement, queryString, context, + paramListInfo, queryEnvironment, + destReceiver, completionTag); + } + else + { + standard_ProcessUtility(plannedStatement, queryString, context, + paramListInfo, queryEnvironment, + destReceiver, completionTag); + } +} + + void cstore_tableam_init() { PreviousExecutorEndHook = ExecutorEnd_hook; ExecutorEnd_hook = CStoreExecutorEnd; + PreviousProcessUtilityHook = ProcessUtility_hook; + ProcessUtility_hook = CStoreTableAMProcessUtility; prevObjectAccessHook = object_access_hook; object_access_hook = CStoreTableAMObjectAccessHook; diff --git a/expected/am_trigger.out b/expected/am_trigger.out new file mode 100644 index 000000000..53b2c9d9e --- /dev/null +++ b/expected/am_trigger.out @@ -0,0 +1,65 @@ +create or replace function trs_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE STATEMENT %', TG_OP; + RETURN NULL; +END; +$$; +create or replace function trs_after() returns trigger language plpgsql as $$ +DECLARE + r RECORD; +BEGIN + RAISE NOTICE 'AFTER STATEMENT %', TG_OP; + IF (TG_OP = 'DELETE') THEN + FOR R IN select * from old_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + ELSE + FOR R IN select * from new_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + END IF; + RETURN NULL; +END; +$$; +create or replace function trr_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; +create or replace function trr_after() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'AFTER ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; +create table test_tr(i int) using cstore_tableam; +create trigger tr_before_stmt before insert on test_tr + for each statement execute procedure trs_before(); +create trigger tr_after_stmt after insert on test_tr + referencing new table as new_table + for each statement execute procedure trs_after(); +create trigger tr_before_row before insert on test_tr + for each row execute procedure trr_before(); +-- after triggers require TIDs, which are not supported yet +create trigger tr_after_row after insert on test_tr + for each row execute procedure trr_after(); +ERROR: AFTER ROW triggers are not supported for columnstore access method +HINT: Consider an AFTER STATEMENT trigger instead. +insert into test_tr values(1); +NOTICE: BEFORE STATEMENT INSERT +NOTICE: BEFORE ROW INSERT: (1) +NOTICE: AFTER STATEMENT INSERT +NOTICE: (1) +insert into test_tr values(2),(3),(4); +NOTICE: BEFORE STATEMENT INSERT +NOTICE: BEFORE ROW INSERT: (2) +NOTICE: BEFORE ROW INSERT: (3) +NOTICE: BEFORE ROW INSERT: (4) +NOTICE: AFTER STATEMENT INSERT +NOTICE: (2) +NOTICE: (3) +NOTICE: (4) +drop table test_tr; diff --git a/sql/am_trigger.sql b/sql/am_trigger.sql new file mode 100644 index 000000000..b8a918cf4 --- /dev/null +++ b/sql/am_trigger.sql @@ -0,0 +1,61 @@ + +create or replace function trs_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE STATEMENT %', TG_OP; + RETURN NULL; +END; +$$; + +create or replace function trs_after() returns trigger language plpgsql as $$ +DECLARE + r RECORD; +BEGIN + RAISE NOTICE 'AFTER STATEMENT %', TG_OP; + IF (TG_OP = 'DELETE') THEN + FOR R IN select * from old_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + ELSE + FOR R IN select * from new_table + LOOP + RAISE NOTICE ' (%)', r.i; + END LOOP; + END IF; + RETURN NULL; +END; +$$; + +create or replace function trr_before() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'BEFORE ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; + +create or replace function trr_after() returns trigger language plpgsql as $$ +BEGIN + RAISE NOTICE 'AFTER ROW %: (%)', TG_OP, NEW.i; + RETURN NEW; +END; +$$; + +create table test_tr(i int) using cstore_tableam; + +create trigger tr_before_stmt before insert on test_tr + for each statement execute procedure trs_before(); +create trigger tr_after_stmt after insert on test_tr + referencing new table as new_table + for each statement execute procedure trs_after(); + +create trigger tr_before_row before insert on test_tr + for each row execute procedure trr_before(); + +-- after triggers require TIDs, which are not supported yet +create trigger tr_after_row after insert on test_tr + for each row execute procedure trr_after(); + +insert into test_tr values(1); +insert into test_tr values(2),(3),(4); + +drop table test_tr; From c92ea1de9603ccbb1ffb4837f2995de09342694e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 13 Oct 2020 22:07:42 -0700 Subject: [PATCH 085/124] Implement concurrent writes --- Makefile | 2 +- cstore.h | 33 ++++++- cstore_fdw.c | 12 +-- cstore_metadata_tables.c | 103 +++++++++++++++++++++- cstore_tableam.c | 2 - cstore_writer.c | 128 ++++++--------------------- expected/am_vacuum.out | 26 +++--- expected/am_vacuum_vs_insert.out | 4 +- expected/am_write_concurrency.out | 142 ++++++++++++++++++++++++++++++ specs/am_write_concurrency.spec | 67 ++++++++++++++ 10 files changed, 388 insertions(+), 131 deletions(-) create mode 100644 expected/am_write_concurrency.out create mode 100644 specs/am_write_concurrency.spec diff --git a/Makefile b/Makefile index 0d581f145..6804bae42 100644 --- a/Makefile +++ b/Makefile @@ -55,7 +55,7 @@ ifeq ($(USE_TABLEAM),yes) REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ am_block_filtering am_join am_trigger - ISOLATION += am_vacuum_vs_insert + ISOLATION += am_write_concurrency am_vacuum_vs_insert endif ifeq ($(enable_coverage),yes) diff --git a/cstore.h b/cstore.h index f5e0590a8..7ff657e33 100644 --- a/cstore.h +++ b/cstore.h @@ -218,15 +218,12 @@ typedef struct TableReadState /* TableWriteState represents state of a cstore file write operation. */ typedef struct TableWriteState { - DataFileMetadata *datafileMetadata; CompressionType compressionType; TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; - uint64 currentFileOffset; Relation relation; MemoryContext stripeWriteContext; - uint64 currentStripeId; StripeBuffers *stripeBuffers; StripeSkipList *stripeSkipList; uint32 stripeMaxRowCount; @@ -284,9 +281,11 @@ extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressio /* cstore_metadata_tables.c */ extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); -extern void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); extern uint64 GetHighestUsedAddress(Oid relfilenode); +extern StripeMetadata ReserveStripe(Relation rel, uint64 size, + uint64 rowCount, uint64 columnCount, + uint64 blockCount, uint64 blockRowCount); extern void SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipList, TupleDesc tupleDescriptor); @@ -317,4 +316,30 @@ logical_to_smgr(uint64 logicalOffset) } +/* + * Map a physical page adnd offset address to a logical address. + */ +static inline uint64 +smgr_to_logical(SmgrAddr addr) +{ + uint64 bytes_per_page = BLCKSZ - SizeOfPageHeaderData; + return bytes_per_page * addr.blockno + addr.offset - SizeOfPageHeaderData; +} + + +/* + * Get the first usable address of next block. + */ +static inline SmgrAddr +next_block_start(SmgrAddr addr) +{ + SmgrAddr result = { + .blockno = addr.blockno + 1, + .offset = SizeOfPageHeaderData + }; + + return result; +} + + #endif /* CSTORE_H */ diff --git a/cstore_fdw.c b/cstore_fdw.c index 2790efaca..221c97843 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -494,10 +494,10 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) Assert(copyStatement->relation != NULL); /* - * Open and lock the relation. We acquire ShareUpdateExclusiveLock to allow - * concurrent reads, but block concurrent writes. + * Open and lock the relation. We acquire RowExclusiveLock to allow + * concurrent reads and writes. */ - relation = cstore_fdw_openrv(copyStatement->relation, ShareUpdateExclusiveLock); + relation = cstore_fdw_openrv(copyStatement->relation, RowExclusiveLock); relationId = RelationGetRelid(relation); /* allocate column values and nulls arrays */ @@ -572,7 +572,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) /* end read/write sessions and close the relation */ EndCopyFrom(copyState); CStoreEndWrite(writeState); - heap_close(relation, ShareUpdateExclusiveLock); + heap_close(relation, RowExclusiveLock); return processedRowCount; } @@ -2015,7 +2015,7 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela Relation relation = NULL; foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); - relation = cstore_fdw_open(foreignTableOid, ShareUpdateExclusiveLock); + relation = cstore_fdw_open(foreignTableOid, RowExclusiveLock); cstoreOptions = CStoreGetOptions(foreignTableOid); tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); @@ -2086,7 +2086,7 @@ CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) Relation relation = writeState->relation; CStoreEndWrite(writeState); - heap_close(relation, ShareUpdateExclusiveLock); + heap_close(relation, RowExclusiveLock); } } diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index ced5900d6..1bfc4be49 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -31,6 +31,8 @@ #include "lib/stringinfo.h" #include "port.h" #include "storage/fd.h" +#include "storage/lmgr.h" +#include "storage/smgr.h" #include "utils/builtins.h" #include "utils/fmgroids.h" #include "utils/memutils.h" @@ -43,6 +45,10 @@ typedef struct EState *estate; } ModifyState; +static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe); +static void GetHighestUsedAddressAndId(Oid relfilenode, + uint64 *highestUsedAddress, + uint64 *highestUsedId); static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot); static Oid CStoreStripesRelationId(void); static Oid CStoreStripesIndexRelationId(void); @@ -311,7 +317,7 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, /* * InsertStripeMetadataRow adds a row to cstore_stripes. */ -void +static void InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) { bool nulls[Natts_cstore_stripes] = { 0 }; @@ -330,7 +336,9 @@ InsertStripeMetadataRow(Oid relfilenode, StripeMetadata *stripe) Relation cstoreStripes = heap_open(cstoreStripesOid, RowExclusiveLock); ModifyState *modifyState = StartModifyRelation(cstoreStripes); + InsertTupleAndEnforceConstraints(modifyState, values, nulls); + FinishModifyRelation(modifyState); CommandCounterIncrement(); @@ -376,6 +384,23 @@ uint64 GetHighestUsedAddress(Oid relfilenode) { uint64 highestUsedAddress = 0; + uint64 highestUsedId = 0; + + GetHighestUsedAddressAndId(relfilenode, &highestUsedAddress, &highestUsedId); + + return highestUsedAddress; +} + + +/* + * GetHighestUsedAddressAndId returns the highest used address and id for + * the given relfilenode across all active and inactive transactions. + */ +static void +GetHighestUsedAddressAndId(Oid relfilenode, + uint64 *highestUsedAddress, + uint64 *highestUsedId) +{ ListCell *stripeMetadataCell = NULL; List *stripeMetadataList = NIL; @@ -384,14 +409,83 @@ GetHighestUsedAddress(Oid relfilenode) stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); + *highestUsedId = 0; + *highestUsedAddress = 0; + foreach(stripeMetadataCell, stripeMetadataList) { StripeMetadata *stripe = lfirst(stripeMetadataCell); uint64 lastByte = stripe->fileOffset + stripe->dataLength - 1; - highestUsedAddress = Max(highestUsedAddress, lastByte); + *highestUsedAddress = Max(*highestUsedAddress, lastByte); + *highestUsedId = Max(*highestUsedId, stripe->id); + } +} + + +/* + * ReserveStripe reserves and stripe of given size for the given relation, + * and inserts it into cstore_stripes. It is guaranteed that concurrent + * writes won't overwrite the returned stripe. + */ +StripeMetadata +ReserveStripe(Relation rel, uint64 sizeBytes, + uint64 rowCount, uint64 columnCount, + uint64 blockCount, uint64 blockRowCount) +{ + StripeMetadata stripe = { 0 }; + Oid relfilenode = InvalidOid; + uint64 currLogicalHigh = 0; + SmgrAddr currSmgrHigh; + uint64 nblocks = 0; + uint64 resLogicalStart = 0; + SmgrAddr resSmgrStart; + uint64 resLogicalEnd = 0; + SmgrAddr resSmgrEnd; + uint64 highestId = 0; + + /* + * We take ShareUpdateExclusiveLock here, so two space + * reservations conflict, space reservation <-> vacuum + * conflict, but space reservation doesn't conflict with + * reads & writes. + */ + LockRelation(rel, ShareUpdateExclusiveLock); + + relfilenode = rel->rd_node.relNode; + GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId); + currSmgrHigh = logical_to_smgr(currLogicalHigh); + + resSmgrStart = next_block_start(currSmgrHigh); + resLogicalStart = smgr_to_logical(resSmgrStart); + + resLogicalEnd = resLogicalStart + sizeBytes - 1; + resSmgrEnd = logical_to_smgr(resLogicalEnd); + + RelationOpenSmgr(rel); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + + while (resSmgrEnd.blockno >= nblocks) + { + Buffer newBuffer = ReadBuffer(rel, P_NEW); + ReleaseBuffer(newBuffer); + nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); } - return highestUsedAddress; + RelationCloseSmgr(rel); + + stripe.fileOffset = resLogicalStart; + stripe.dataLength = sizeBytes; + stripe.blockCount = blockCount; + stripe.blockRowCount = blockRowCount; + stripe.columnCount = columnCount; + stripe.rowCount = rowCount; + stripe.id = highestId + 1; + + InsertStripeMetadataRow(relfilenode, &stripe); + + UnlockRelation(rel, ShareUpdateExclusiveLock); + + return stripe; } @@ -419,7 +513,7 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); tupleDescriptor = RelationGetDescr(cstoreStripes); - scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, NULL, 1, + scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, snapshot, 1, scanKey); while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) @@ -593,6 +687,7 @@ InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls) #if PG_VERSION_NUM >= 120000 TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor, &TTSOpsHeapTuple); + ExecStoreHeapTuple(tuple, slot, false); #else TupleTableSlot *slot = ExecInitExtraTupleSlot(state->estate, tupleDescriptor); diff --git a/cstore_tableam.c b/cstore_tableam.c index 09a65d75b..ae7799410 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -120,8 +120,6 @@ ResetCStoreMemoryContext() static void cstore_init_write_state(Relation relation) { - /*TODO: upgrade lock to serialize writes */ - if (CStoreWriteState != NULL) { /* TODO: consider whether it's possible for a new write to start */ diff --git a/cstore_writer.c b/cstore_writer.c index 2c0ca541e..3be14994b 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -33,7 +33,7 @@ static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount); -static StripeMetadata FlushStripe(TableWriteState *writeState); +static void FlushStripe(TableWriteState *writeState); static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength); static void SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, int datumTypeLength, @@ -45,8 +45,6 @@ static void UpdateBlockSkipNodeMinMax(ColumnBlockSkipNode *blockSkipNode, int columnTypeLength, Oid columnCollation, FmgrInfo *comparisonFunction); static Datum DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength); -static void AppendStripeMetadata(DataFileMetadata *datafileMetadata, - StripeMetadata stripeMetadata); static StringInfo CopyStringInfo(StringInfo sourceString); @@ -64,34 +62,12 @@ CStoreBeginWrite(Relation relation, TupleDesc tupleDescriptor) { TableWriteState *writeState = NULL; - DataFileMetadata *datafileMetadata = NULL; FmgrInfo **comparisonFunctionArray = NULL; MemoryContext stripeWriteContext = NULL; - uint64 currentFileOffset = 0; uint32 columnCount = 0; uint32 columnIndex = 0; bool *columnMaskArray = NULL; BlockData *blockData = NULL; - uint64 currentStripeId = 0; - Oid relNode = relation->rd_node.relNode; - - datafileMetadata = ReadDataFileMetadata(relNode, false); - - /* - * If stripeMetadataList is not empty, jump to the position right after - * the last position. - */ - if (datafileMetadata->stripeMetadataList != NIL) - { - StripeMetadata *lastStripe = NULL; - uint64 lastStripeSize = 0; - - lastStripe = llast(datafileMetadata->stripeMetadataList); - lastStripeSize += lastStripe->dataLength; - - currentFileOffset = lastStripe->fileOffset + lastStripeSize; - currentStripeId = lastStripe->id + 1; - } /* get comparison function pointers for each of the columns */ columnCount = tupleDescriptor->natts; @@ -129,19 +105,16 @@ CStoreBeginWrite(Relation relation, writeState = palloc0(sizeof(TableWriteState)); writeState->relation = relation; - writeState->datafileMetadata = datafileMetadata; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; writeState->blockRowCount = blockRowCount; writeState->tupleDescriptor = tupleDescriptor; - writeState->currentFileOffset = currentFileOffset; writeState->comparisonFunctionArray = comparisonFunctionArray; writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; writeState->stripeWriteContext = stripeWriteContext; writeState->blockData = blockData; writeState->compressionBuffer = NULL; - writeState->currentStripeId = currentStripeId; return writeState; } @@ -164,7 +137,6 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; - DataFileMetadata *datafileMetadata = writeState->datafileMetadata; const uint32 blockRowCount = writeState->blockRowCount; BlockData *blockData = writeState->blockData; MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); @@ -238,28 +210,14 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul stripeBuffers->rowCount++; if (stripeBuffers->rowCount >= writeState->stripeMaxRowCount) { - StripeMetadata stripeMetadata = FlushStripe(writeState); - MemoryContextReset(writeState->stripeWriteContext); - - writeState->currentStripeId++; + FlushStripe(writeState); /* set stripe data and skip list to NULL so they are recreated next time */ writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; + } - /* - * Append stripeMetadata in old context so next MemoryContextReset - * doesn't free it. - */ - MemoryContextSwitchTo(oldContext); - InsertStripeMetadataRow(writeState->relation->rd_node.relNode, - &stripeMetadata); - AppendStripeMetadata(datafileMetadata, stripeMetadata); - } - else - { - MemoryContextSwitchTo(oldContext); - } + MemoryContextSwitchTo(oldContext); } @@ -278,17 +236,13 @@ CStoreEndWrite(TableWriteState *writeState) { MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); - StripeMetadata stripeMetadata = FlushStripe(writeState); + FlushStripe(writeState); MemoryContextReset(writeState->stripeWriteContext); MemoryContextSwitchTo(oldContext); - InsertStripeMetadataRow(writeState->relation->rd_node.relNode, - &stripeMetadata); - AppendStripeMetadata(writeState->datafileMetadata, stripeMetadata); } MemoryContextDelete(writeState->stripeWriteContext); - list_free_deep(writeState->datafileMetadata->stripeMetadataList); pfree(writeState->comparisonFunctionArray); FreeBlockData(writeState->blockData); pfree(writeState); @@ -366,11 +320,9 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, static void -WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) +WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) { - uint64 logicalOffset = writeState->currentFileOffset; uint64 remaining = dataLength; - Relation rel = writeState->relation; Buffer buffer; while (remaining > 0) @@ -383,14 +335,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) RelationOpenSmgr(rel); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); - - while (addr.blockno >= nblocks) - { - Buffer newBuffer = ReadBuffer(rel, P_NEW); - ReleaseBuffer(newBuffer); - nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); - } - + Assert(addr.blockno < nblocks); RelationCloseSmgr(rel); buffer = ReadBuffer(rel, addr.blockno); @@ -459,7 +404,7 @@ WriteToSmgr(TableWriteState *writeState, char *data, uint32 dataLength) * the function creates the skip list and footer buffers. Finally, the function * flushes the skip list, data, and footer buffers to the file. */ -static StripeMetadata +static void FlushStripe(TableWriteState *writeState) { StripeMetadata stripeMetadata = { 0 }; @@ -474,8 +419,9 @@ FlushStripe(TableWriteState *writeState) uint32 blockRowCount = writeState->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; - uint64 initialFileOffset = writeState->currentFileOffset; + uint64 currentFileOffset = 0; uint64 stripeSize = 0; + uint64 stripeRowCount = 0; /* * check if the last block needs serialization , the last block was not serialized @@ -520,6 +466,18 @@ FlushStripe(TableWriteState *writeState) } } + for (blockIndex = 0; blockIndex < blockCount; blockIndex++) + { + stripeRowCount += + stripeSkipList->blockSkipNodeArray[0][blockIndex].rowCount; + } + + stripeMetadata = ReserveStripe(writeState->relation, stripeSize, + stripeRowCount, columnCount, blockCount, + blockRowCount); + + currentFileOffset = stripeMetadata.fileOffset; + /* * Each stripe has only one section: * Data section, in which we store data for each column continuously. @@ -541,8 +499,9 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo existsBuffer = blockBuffers->existsBuffer; - WriteToSmgr(writeState, existsBuffer->data, existsBuffer->len); - writeState->currentFileOffset += existsBuffer->len; + WriteToSmgr(writeState->relation, currentFileOffset, + existsBuffer->data, existsBuffer->len); + currentFileOffset += existsBuffer->len; } for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) @@ -551,30 +510,16 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = blockBuffers->valueBuffer; - WriteToSmgr(writeState, valueBuffer->data, valueBuffer->len); - writeState->currentFileOffset += valueBuffer->len; + WriteToSmgr(writeState->relation, currentFileOffset, + valueBuffer->data, valueBuffer->len); + currentFileOffset += valueBuffer->len; } } /* create skip list and footer buffers */ SaveStripeSkipList(writeState->relation->rd_node.relNode, - writeState->currentStripeId, + stripeMetadata.id, stripeSkipList, tupleDescriptor); - - for (blockIndex = 0; blockIndex < blockCount; blockIndex++) - { - stripeMetadata.rowCount += - stripeSkipList->blockSkipNodeArray[0][blockIndex].rowCount; - } - - stripeMetadata.fileOffset = initialFileOffset; - stripeMetadata.dataLength = stripeSize; - stripeMetadata.id = writeState->currentStripeId; - stripeMetadata.blockCount = blockCount; - stripeMetadata.blockRowCount = writeState->blockRowCount; - stripeMetadata.columnCount = columnCount; - - return stripeMetadata; } @@ -797,21 +742,6 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) } -/* - * AppendStripeMetadata adds a copy of given stripeMetadata to the given - * table footer's stripeMetadataList. - */ -static void -AppendStripeMetadata(DataFileMetadata *datafileMetadata, StripeMetadata stripeMetadata) -{ - StripeMetadata *stripeMetadataCopy = palloc0(sizeof(StripeMetadata)); - memcpy(stripeMetadataCopy, &stripeMetadata, sizeof(StripeMetadata)); - - datafileMetadata->stripeMetadataList = lappend(datafileMetadata->stripeMetadataList, - stripeMetadataCopy); -} - - /* * CopyStringInfo creates a deep copy of given source string allocating only needed * amount of memory. diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index 3db30a761..d1270a3d2 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -68,24 +68,24 @@ ALTER TABLE t DROP COLUMN a; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; stripe | attr | block | ?column? | ?column? --------+------+-------+----------+---------- - 0 | 1 | 0 | f | f - 0 | 2 | 0 | f | f 1 | 1 | 0 | f | f 1 | 2 | 0 | f | f 2 | 1 | 0 | f | f 2 | 2 | 0 | f | f + 3 | 1 | 0 | f | f + 3 | 2 | 0 | f | f (6 rows) VACUUM FULL t; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; stripe | attr | block | ?column? | ?column? --------+------+-------+----------+---------- - 0 | 1 | 0 | t | t - 0 | 2 | 0 | f | f 1 | 1 | 0 | t | t 1 | 2 | 0 | f | f 2 | 1 | 0 | t | t 2 | 2 | 0 | f | f + 3 | 1 | 0 | t | t + 3 | 2 | 0 | f | f (6 rows) -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands @@ -107,14 +107,14 @@ SELECT count(*) FROM t; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- - 16 kB + 32 kB (1 row) INSERT INTO t SELECT i FROM generate_series(1, 10000) i; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- - 56 kB + 112 kB (1 row) SELECT count(*) FROM t; @@ -129,23 +129,23 @@ ROLLBACK TO SAVEPOINT s1; SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- - 56 kB + 112 kB (1 row) COMMIT; -- vacuum should truncate the relation to the usable space VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 57344, total data size: 10754 +total file size: 114688, total data size: 10754 total row count: 2530, stripe count: 3, average rows per stripe: 843 block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz compressed: 0 -INFO: "t": truncated 7 to 2 pages +INFO: "t": truncated 14 to 4 pages DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s SELECT pg_size_pretty(pg_relation_size('t')); pg_size_pretty ---------------- - 16 kB + 32 kB (1 row) SELECT count(*) FROM t; @@ -172,7 +172,7 @@ INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; COMMIT; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 24576, total data size: 18808 +total file size: 49152, total data size: 18808 total row count: 5530, stripe count: 5, average rows per stripe: 1106 block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 @@ -188,7 +188,7 @@ INSERT INTO t SELECT 1, i / 5 FROM generate_series(1, 1500) i; ALTER TABLE t DROP COLUMN c; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 32768, total data size: 31372 +total file size: 65536, total data size: 31372 total row count: 7030, stripe count: 6, average rows per stripe: 1171 block count: 11, containing data for dropped columns: 2, none compressed: 9, pglz compressed: 2 @@ -199,7 +199,7 @@ SET cstore.compression TO "pglz"; VACUUM FULL t; VACUUM VERBOSE t; INFO: statistics for "t": -total file size: 16384, total data size: 15728 +total file size: 49152, total data size: 15728 total row count: 7030, stripe count: 4, average rows per stripe: 1757 block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 6 diff --git a/expected/am_vacuum_vs_insert.out b/expected/am_vacuum_vs_insert.out index a3eb0fb89..d463bd076 100644 --- a/expected/am_vacuum_vs_insert.out +++ b/expected/am_vacuum_vs_insert.out @@ -11,7 +11,7 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: statistics for "test_vacuum_vs_insert": -total file size: 8192, total data size: 26 +total file size: 24576, total data size: 26 total row count: 3, stripe count: 1, average rows per stripe: 3 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 @@ -51,7 +51,7 @@ step s1-commit: COMMIT; s2: INFO: vacuuming "public.test_vacuum_vs_insert" -s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 1 pages +s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versions in 3 pages DETAIL: 0 dead row versions cannot be removed yet. CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s. step s2-vacuum-full: <... completed> diff --git a/expected/am_write_concurrency.out b/expected/am_write_concurrency.out new file mode 100644 index 000000000..41c6ee7e6 --- /dev/null +++ b/expected/am_write_concurrency.out @@ -0,0 +1,142 @@ +Parsed test spec with 2 sessions + +starting permutation: s1-begin s2-begin s1-insert s2-insert s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s1-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(1, 3) i; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +1 2 +2 4 +3 6 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +1 2 +2 4 +3 6 +4 8 +5 10 +6 12 + +starting permutation: s1-begin s2-begin s1-copy s2-insert s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s1-copy: + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +11 +12 +13 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +11 +12 +13 + +starting permutation: s1-begin s2-begin s2-insert s1-copy s1-select s2-select s1-commit s2-commit s1-select +step s1-begin: + BEGIN; + +step s2-begin: + BEGIN; + +step s2-insert: + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; + +step s1-copy: + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +11 +12 +13 +step s2-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +step s1-commit: + COMMIT; + +step s2-commit: + COMMIT; + +step s1-select: + SELECT * FROM test_insert_concurrency ORDER BY a; + +a b + +4 8 +5 10 +6 12 +11 +12 +13 diff --git a/specs/am_write_concurrency.spec b/specs/am_write_concurrency.spec new file mode 100644 index 000000000..7b5d90a4d --- /dev/null +++ b/specs/am_write_concurrency.spec @@ -0,0 +1,67 @@ +setup +{ + CREATE TABLE test_insert_concurrency (a int, b int) USING cstore_tableam; +} + +teardown +{ + DROP TABLE IF EXISTS test_insert_concurrency CASCADE; +} + +session "s1" + +step "s1-begin" +{ + BEGIN; +} + +step "s1-insert" +{ + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(1, 3) i; +} + +step "s1-copy" +{ + COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; +} + +step "s1-select" +{ + SELECT * FROM test_insert_concurrency ORDER BY a; +} + +step "s1-commit" +{ + COMMIT; +} + +session "s2" + +step "s2-begin" +{ + BEGIN; +} + +step "s2-insert" +{ + INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; +} + +step "s2-select" +{ + SELECT * FROM test_insert_concurrency ORDER BY a; +} + +step "s2-commit" +{ + COMMIT; +} + +# writes shouldn't block writes or reads +permutation "s1-begin" "s2-begin" "s1-insert" "s2-insert" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" + +# copy vs insert +permutation "s1-begin" "s2-begin" "s1-copy" "s2-insert" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" + +# insert vs copy +permutation "s1-begin" "s2-begin" "s2-insert" "s1-copy" "s1-select" "s2-select" "s1-commit" "s2-commit" "s1-select" From a3caa5ff0f41dafd83ed2677bdb063e4c035efca Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 30 Oct 2020 19:27:42 -0700 Subject: [PATCH 086/124] fix "make clean" --- Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 6804bae42..5b65a08bb 100644 --- a/Makefile +++ b/Makefile @@ -36,10 +36,12 @@ DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cs REGRESS = extension_create ISOLATION = create -EXTRA_CLEAN = cstore.pb-c.h cstore.pb-c.c data/*.cstore data/*.cstore.footer \ - sql/block_filtering.sql sql/create.sql sql/data_types.sql sql/load.sql \ - sql/copyto.sql expected/block_filtering.out expected/create.out \ - expected/data_types.out expected/load.out expected/copyto.out +EXTRA_CLEAN = sql/fdw_block_filtering.sql sql/fdw_create.sql sql/fdw_data_types.sql sql/fdw_load.sql \ + sql/fdw_copyto.sql expected/fdw_block_filtering.out expected/fdw_create.out \ + expected/fdw_data_types.out expected/fdw_load.out expected/fdw_copyto.out \ + sql/am_block_filtering.sql sql/am_create.sql sql/am_data_types.sql sql/am_load.sql \ + sql/am_copyto.sql expected/am_block_filtering.out expected/am_create.out \ + expected/am_data_types.out expected/am_load.out expected/am_copyto.out ifeq ($(USE_FDW),yes) PG_CFLAGS += -DUSE_FDW From acd49b68aa20342b828adb08f19f4c8d34a5fcd5 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sun, 1 Nov 2020 16:57:31 -0800 Subject: [PATCH 087/124] Support for v13 --- cstore_fdw--1.7--1.8.sql | 2 +- cstore_fdw.c | 45 +++++++++++++++++++++++++++------------- cstore_tableam.c | 45 +++++++++++++++++++++++++++------------- cstore_version_compat.h | 28 +++++++++++++++---------- 4 files changed, 80 insertions(+), 40 deletions(-) diff --git a/cstore_fdw--1.7--1.8.sql b/cstore_fdw--1.7--1.8.sql index 8fe9416d1..cf6d510d5 100644 --- a/cstore_fdw--1.7--1.8.sql +++ b/cstore_fdw--1.7--1.8.sql @@ -3,7 +3,7 @@ DO $proc$ BEGIN -IF version() ~ '12' THEN +IF version() ~ '12' or version() ~ '13' THEN EXECUTE $$ CREATE FUNCTION cstore_tableam_handler(internal) RETURNS table_am_handler diff --git a/cstore_fdw.c b/cstore_fdw.c index 221c97843..328125535 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -20,7 +20,11 @@ #include "access/heapam.h" #include "access/reloptions.h" +#if PG_VERSION_NUM >= 130000 +#include "access/heaptoast.h" +#else #include "access/tuptoaster.h" +#endif #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/indexing.h" @@ -110,7 +114,14 @@ static const CStoreValidOption ValidOptionArray[] = static object_access_hook_type prevObjectAccessHook = NULL; /* local functions forward declarations */ -#if PG_VERSION_NUM >= 100000 +#if PG_VERSION_NUM >= 130000 +static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + QueryCompletion *queryCompletion); +#elif PG_VERSION_NUM >= 100000 static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, ProcessUtilityContext context, ParamListInfo paramListInfo, @@ -216,7 +227,8 @@ static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; void cstore_fdw_init() { - PreviousProcessUtilityHook = ProcessUtility_hook; + PreviousProcessUtilityHook = (ProcessUtility_hook != NULL) ? + ProcessUtility_hook : standard_ProcessUtility; ProcessUtility_hook = CStoreProcessUtility; prevObjectAccessHook = object_access_hook; object_access_hook = CStoreFdwObjectAccessHook; @@ -284,13 +296,20 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) * the previous utility hook or the standard utility command via macro * CALL_PREVIOUS_UTILITY. */ -#if PG_VERSION_NUM >= 100000 +#if PG_VERSION_NUM >= 130000 static void CStoreProcessUtility(PlannedStmt *plannedStatement, const char *queryString, ProcessUtilityContext context, ParamListInfo paramListInfo, QueryEnvironment *queryEnvironment, - DestReceiver *destReceiver, char *completionTag) + DestReceiver *destReceiver, QueryCompletion *queryCompletion) +#elif PG_VERSION_NUM >= 100000 +static void +CStoreProcessUtility(PlannedStmt * plannedStatement, const char * queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment * queryEnvironment, + DestReceiver * destReceiver, char * completionTag) #else static void CStoreProcessUtility(Node * parseTree, const char * queryString, @@ -299,6 +318,9 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, DestReceiver * destReceiver, char * completionTag) #endif { +#if PG_VERSION_NUM >= 130000 + char *completionTag = NULL; +#endif #if PG_VERSION_NUM >= 100000 Node *parseTree = plannedStatement->utilityStmt; #endif @@ -313,8 +335,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, } else { - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); } } else if (nodeTag(parseTree) == T_TruncateStmt) @@ -330,8 +351,7 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, { truncateStatement->relations = otherTablesList; - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); /* restore the former relation list. Our * replacement could be freed but still needed @@ -352,21 +372,18 @@ CStoreProcessUtility(Node * parseTree, const char * queryString, { AlterTableStmt *alterTable = (AlterTableStmt *) parseTree; CStoreProcessAlterTableCommand(alterTable); - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); } else if (nodeTag(parseTree) == T_DropdbStmt) { /* let postgres handle error checking and dropping of the database */ - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); } /* handle other utility statements */ else { - CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, - destReceiver, completionTag); + CALL_PREVIOUS_UTILITY(); } } diff --git a/cstore_tableam.c b/cstore_tableam.c index ae7799410..c22ab7baf 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -10,7 +10,11 @@ #include "access/rewriteheap.h" #include "access/tableam.h" #include "access/tsmapi.h" +#if PG_VERSION_NUM >= 130000 +#include "access/heaptoast.h" +#else #include "access/tuptoaster.h" +#endif #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/index.h" @@ -41,6 +45,7 @@ #include "cstore.h" #include "cstore_customscan.h" #include "cstore_tableam.h" +#include "cstore_version_compat.h" #define CSTORE_TABLEAM_NAME "cstore_tableam" @@ -70,6 +75,15 @@ static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; static void CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId, int subId, void *arg); +#if PG_VERSION_NUM >= 130000 +static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment *queryEnvironment, + DestReceiver *destReceiver, + QueryCompletion *qc); +#else static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, const char *queryString, ProcessUtilityContext context, @@ -77,6 +91,8 @@ static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, QueryEnvironment *queryEnvironment, DestReceiver *destReceiver, char *completionTag); +#endif + static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); @@ -1035,6 +1051,7 @@ CStoreExecutorEnd(QueryDesc *queryDesc) } +#if PG_VERSION_NUM >= 130000 static void CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, const char *queryString, @@ -1042,7 +1059,17 @@ CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, ParamListInfo paramListInfo, QueryEnvironment *queryEnvironment, DestReceiver *destReceiver, - char *completionTag) + QueryCompletion *queryCompletion) +#else +static void +CStoreTableAMProcessUtility(PlannedStmt * plannedStatement, + const char * queryString, + ProcessUtilityContext context, + ParamListInfo paramListInfo, + QueryEnvironment * queryEnvironment, + DestReceiver * destReceiver, + char * completionTag) +#endif { Node *parseTree = plannedStatement->utilityStmt; @@ -1067,18 +1094,7 @@ CStoreTableAMProcessUtility(PlannedStmt *plannedStatement, } } - if (PreviousProcessUtilityHook != NULL) - { - PreviousProcessUtilityHook(plannedStatement, queryString, context, - paramListInfo, queryEnvironment, - destReceiver, completionTag); - } - else - { - standard_ProcessUtility(plannedStatement, queryString, context, - paramListInfo, queryEnvironment, - destReceiver, completionTag); - } + CALL_PREVIOUS_UTILITY(); } @@ -1087,7 +1103,8 @@ cstore_tableam_init() { PreviousExecutorEndHook = ExecutorEnd_hook; ExecutorEnd_hook = CStoreExecutorEnd; - PreviousProcessUtilityHook = ProcessUtility_hook; + PreviousProcessUtilityHook = (ProcessUtility_hook != NULL) ? + ProcessUtility_hook : standard_ProcessUtility; ProcessUtility_hook = CStoreTableAMProcessUtility; prevObjectAccessHook = object_access_hook; object_access_hook = CStoreTableAMObjectAccessHook; diff --git a/cstore_version_compat.h b/cstore_version_compat.h index 3d1a60f93..69eb9c9f3 100644 --- a/cstore_version_compat.h +++ b/cstore_version_compat.h @@ -32,18 +32,18 @@ ExplainPropertyInteger(qlabel, NULL, value, es) #endif -#define PREVIOUS_UTILITY (PreviousProcessUtilityHook != NULL \ - ? PreviousProcessUtilityHook : standard_ProcessUtility) -#if PG_VERSION_NUM >= 100000 -#define CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, \ - destReceiver, completionTag) \ - PREVIOUS_UTILITY(plannedStatement, queryString, context, paramListInfo, \ - queryEnvironment, destReceiver, completionTag) +#if PG_VERSION_NUM >= 130000 +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(plannedStatement, queryString, context, paramListInfo, \ + queryEnvironment, destReceiver, queryCompletion) +#elif PG_VERSION_NUM >= 100000 +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(plannedStatement, queryString, context, paramListInfo, \ + queryEnvironment, destReceiver, completionTag) #else -#define CALL_PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, \ - destReceiver, completionTag) \ - PREVIOUS_UTILITY(parseTree, queryString, context, paramListInfo, destReceiver, \ - completionTag) +#define CALL_PREVIOUS_UTILITY() \ + PreviousProcessUtilityHook(parseTree, queryString, context, paramListInfo, \ + destReceiver, completionTag) #endif #if PG_VERSION_NUM < 120000 @@ -56,4 +56,10 @@ #endif +#if PG_VERSION_NUM >= 130000 +#define heap_open table_open +#define heap_openrv table_openrv +#define heap_close table_close +#endif + #endif /* CSTORE_COMPAT_H */ From 288025d9eae42d60767d39ee84ffa5440aa044ea Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Mon, 2 Nov 2020 13:04:18 +0100 Subject: [PATCH 088/124] add pg13 on CI --- .circleci/config.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9f2532c1d..645211182 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -45,6 +45,20 @@ jobs: paths: - install-12.tar + build-13: + docker: + - image: 'citus/extbuilder:13.0' + steps: + - checkout + - run: + name: 'Configure, Build, and Install' + command: | + PG_MAJOR=13 .circleci/build.sh + - persist_to_workspace: + root: . + paths: + - install-13.tar + test-11_checkinstall: docker: - image: 'citus/exttester:11.9' @@ -85,6 +99,26 @@ jobs: - codecov/upload: flags: 'test_12,installcheck' + test-13_checkinstall: + docker: + - image: 'citus/exttester:13.0' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Prepare Container & Install Extension' + command: | + chown -R circleci:circleci /home/circleci + tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / + - run: + name: 'Run Test' + command: | + gosu circleci .circleci/run_test.sh installcheck + - codecov/upload: + flags: 'test_13,installcheck' + workflows: version: 2 build_and_test: @@ -94,8 +128,11 @@ workflows: - build-11 - build-12 + - build-13 - test-11_checkinstall: requires: [build-11] - test-12_checkinstall: requires: [build-12] + - test-13_checkinstall: + requires: [build-13] From 653dbc615a493d8da7f004b83533d450327fa596 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sat, 31 Oct 2020 13:34:26 -0700 Subject: [PATCH 089/124] Use -Werror --- Makefile | 4 ++-- cstore_customscan.c | 33 ++++++++++++++++++++------------- cstore_tableam.c | 2 +- cstore_writer.c | 1 + 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 5b65a08bb..04b9c12b2 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ else $(error version $(VER) is not supported) endif -PG_CFLAGS = -std=c11 -Wshadow +PG_CFLAGS = -std=c11 -Wshadow -Werror OBJS = cstore.o cstore_writer.o cstore_reader.o \ cstore_compression.o mod.o cstore_metadata_tables.o @@ -40,7 +40,7 @@ EXTRA_CLEAN = sql/fdw_block_filtering.sql sql/fdw_create.sql sql/fdw_data_types. sql/fdw_copyto.sql expected/fdw_block_filtering.out expected/fdw_create.out \ expected/fdw_data_types.out expected/fdw_load.out expected/fdw_copyto.out \ sql/am_block_filtering.sql sql/am_create.sql sql/am_data_types.sql sql/am_load.sql \ - sql/am_copyto.sql expected/am_block_filtering.out expected/am_create.out \ + sql/am_copyto.sql expected/am_block_filtering.out \ expected/am_data_types.out expected/am_load.out expected/am_copyto.out ifeq ($(USE_FDW),yes) diff --git a/cstore_customscan.c b/cstore_customscan.c index d7e6eb667..7c163e5c9 100644 --- a/cstore_customscan.c +++ b/cstore_customscan.c @@ -133,6 +133,8 @@ static void CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { + Relation relation; + /* call into previous hook if assigned */ if (PreviousSetRelPathlistHook) { @@ -156,13 +158,14 @@ CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, * If that is the case we want to insert an extra path that pushes down the projection * into the scan of the table to minimize the data read. */ - Relation relation = RelationIdGetRelation(rte->relid); + relation = RelationIdGetRelation(rte->relid); if (relation->rd_tableam == GetCstoreTableAmRoutine()) { + Path *customPath = CreateCStoreScanPath(rel, rte); + ereport(DEBUG1, (errmsg("pathlist hook for cstore table am"))); /* we propose a new path that will be the only path for scanning this relation */ - Path *customPath = CreateCStoreScanPath(rel, rte); clear_paths(rel); add_path(rel, customPath); } @@ -175,17 +178,19 @@ CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte) { CStoreScanPath *cspath = (CStoreScanPath *) newNode(sizeof(CStoreScanPath), T_CustomPath); + CustomPath *cpath; + Path *path; /* * popuate custom path information */ - CustomPath *cpath = &cspath->custom_path; + cpath = &cspath->custom_path; cpath->methods = &CStoreScanPathMethods; /* * populate generic path information */ - Path *path = &cpath->path; + path = &cpath->path; path->pathtype = T_CustomScan; path->parent = rel; path->pathtarget = rel->reltarget; @@ -212,12 +217,13 @@ CStoreScanCost(RangeTblEntry *rte) { Relation rel = RelationIdGetRelation(rte->relid); DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); - RelationClose(rel); - rel = NULL; - uint32 maxColumnCount = 0; uint64 totalStripeSize = 0; ListCell *stripeMetadataCell = NULL; + + RelationClose(rel); + rel = NULL; + foreach(stripeMetadataCell, metadata->stripeMetadataList) { StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell); @@ -225,12 +231,13 @@ CStoreScanCost(RangeTblEntry *rte) maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount); } - Bitmapset *attr_needed = rte->selectedCols; - double numberOfColumnsRead = bms_num_members(attr_needed); - double selectionRatio = numberOfColumnsRead / (double) maxColumnCount; - Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio; - - return scanCost; + { + Bitmapset *attr_needed = rte->selectedCols; + double numberOfColumnsRead = bms_num_members(attr_needed); + double selectionRatio = numberOfColumnsRead / (double) maxColumnCount; + Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio; + return scanCost; + } } diff --git a/cstore_tableam.c b/cstore_tableam.c index c22ab7baf..b1624f59f 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -244,6 +244,7 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, List *columnList = NIL; List *neededColumnList = NIL; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); + ListCell *columnCell = NULL; scan->cs_base.rs_rd = relation; scan->cs_base.rs_snapshot = snapshot; @@ -255,7 +256,6 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, columnList = RelationColumnList(relation); /* only collect columns that we need for the scan */ - ListCell *columnCell = NULL; foreach(columnCell, columnList) { Var *var = castNode(Var, lfirst(columnCell)); diff --git a/cstore_writer.c b/cstore_writer.c index 3be14994b..9ca8c806e 100644 --- a/cstore_writer.c +++ b/cstore_writer.c @@ -336,6 +336,7 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) RelationOpenSmgr(rel); nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); Assert(addr.blockno < nblocks); + (void) nblocks; /* keep compiler quiet */ RelationCloseSmgr(rel); buffer = ReadBuffer(rel, addr.blockno); From d03e9ca8611f9f805bb3a491233dd6256cf560ec Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 3 Nov 2020 13:39:46 +0100 Subject: [PATCH 090/124] Feature: cstore table options (#25) DESCRIPTION: Add UDF's to maintain cstore table options This PR adds two UDF's and a view to interact and maintain the cstore table options. - ``alter_cstore_table_set(relid REGCLASS, [ options ... ])`` - ``alter_cstore_table_reset(relid REGCLASS, [ options ... ])`` - ``cstore.cstore_options`` The `set` function takes options and their specific types. When specified it will change the option associated with the table to the provided value. When omitted no action is taken. The `reset` function takes options as booleans. When set to `true` the value of the option associated with the table will be reset to the current default as specified by the associated GUC's. The options view containes a record for every cstore table with its associated settings as columns. --- Makefile | 2 +- cstore.h | 8 +- cstore_compression.c | 24 ++++ cstore_fdw--1.7--1.8.sql | 18 +++ cstore_fdw--1.7.sql | 12 ++ cstore_fdw.c | 6 +- cstore_metadata_tables.c | 133 ++++++++++++++++++++-- cstore_tableam.c | 209 ++++++++++++++++++++++++++++++----- expected/am_tableoptions.out | 179 ++++++++++++++++++++++++++++++ expected/am_vacuum.out | 33 +++++- sql/am_tableoptions.sql | 102 +++++++++++++++++ sql/am_vacuum.sql | 13 ++- 12 files changed, 687 insertions(+), 52 deletions(-) create mode 100644 expected/am_tableoptions.out create mode 100644 sql/am_tableoptions.sql diff --git a/Makefile b/Makefile index 04b9c12b2..6be7bbd45 100644 --- a/Makefile +++ b/Makefile @@ -56,7 +56,7 @@ ifeq ($(USE_TABLEAM),yes) OBJS += cstore_tableam.o cstore_customscan.o REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ - am_block_filtering am_join am_trigger + am_block_filtering am_join am_trigger am_tableoptions ISOLATION += am_write_concurrency am_vacuum_vs_insert endif diff --git a/cstore.h b/cstore.h index 7ff657e33..35598cd41 100644 --- a/cstore.h +++ b/cstore.h @@ -93,6 +93,8 @@ typedef struct DataFileMetadata { List *stripeMetadataList; uint64 blockRowCount; + uint64 stripeRowCount; + CompressionType compression; } DataFileMetadata; @@ -277,10 +279,14 @@ extern uint64 CStoreTableRowCount(Relation relation); extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); +extern char * CompressionTypeStr(CompressionType type); /* cstore_metadata_tables.c */ extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); -extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount); +extern void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int + stripeRowCount, CompressionType compression); +extern void UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int + stripeRowCount, CompressionType compression); extern DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk); extern uint64 GetHighestUsedAddress(Oid relfilenode); extern StripeMetadata ReserveStripe(Relation rel, uint64 size, diff --git a/cstore_compression.c b/cstore_compression.c index f6122614a..f36d8dd04 100644 --- a/cstore_compression.c +++ b/cstore_compression.c @@ -170,3 +170,27 @@ DecompressBuffer(StringInfo buffer, CompressionType compressionType) return decompressedBuffer; } + + +/* + * CompressionTypeStr returns string representation of a compression type. + */ +char * +CompressionTypeStr(CompressionType type) +{ + switch (type) + { + case COMPRESSION_NONE: + { + return "none"; + } + + case COMPRESSION_PG_LZ: + { + return "pglz"; + } + + default: + return "unknown"; + } +} diff --git a/cstore_fdw--1.7--1.8.sql b/cstore_fdw--1.7--1.8.sql index cf6d510d5..81cbadfb4 100644 --- a/cstore_fdw--1.7--1.8.sql +++ b/cstore_fdw--1.7--1.8.sql @@ -12,6 +12,24 @@ IF version() ~ '12' or version() ~ '13' THEN CREATE ACCESS METHOD cstore_tableam TYPE TABLE HANDLER cstore_tableam_handler; + + CREATE FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int DEFAULT NULL, + stripe_row_count int DEFAULT NULL, + compression name DEFAULT null) + RETURNS void + LANGUAGE C + AS 'MODULE_PATHNAME', 'alter_cstore_table_set'; + + CREATE FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool DEFAULT false, + stripe_row_count bool DEFAULT false, + compression bool DEFAULT false) + RETURNS void + LANGUAGE C + AS 'MODULE_PATHNAME', 'alter_cstore_table_reset'; $$; END IF; END$proc$; diff --git a/cstore_fdw--1.7.sql b/cstore_fdw--1.7.sql index c19bb1449..1f874ce60 100644 --- a/cstore_fdw--1.7.sql +++ b/cstore_fdw--1.7.sql @@ -34,6 +34,8 @@ LANGUAGE C STRICT; CREATE TABLE cstore_data_files ( relfilenode oid NOT NULL, block_row_count int NOT NULL, + stripe_row_count int NOT NULL, + compression name NOT NULL, version_major bigint NOT NULL, version_minor bigint NOT NULL, PRIMARY KEY (relfilenode) @@ -74,3 +76,13 @@ CREATE TABLE cstore_skipnodes ( ) WITH (user_catalog_table = true); COMMENT ON TABLE cstore_skipnodes IS 'CStore per block metadata'; + +CREATE VIEW cstore_options AS +SELECT c.oid::regclass regclass, + d.block_row_count, + d.stripe_row_count, + d.compression +FROM pg_class c +JOIN cstore.cstore_data_files d USING(relfilenode); + +COMMENT ON VIEW cstore_options IS 'CStore per table settings'; diff --git a/cstore_fdw.c b/cstore_fdw.c index 328125535..c2497fd27 100644 --- a/cstore_fdw.c +++ b/cstore_fdw.c @@ -280,7 +280,8 @@ cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) AccessShareLock, false); Relation relation = cstore_fdw_open(relationId, AccessExclusiveLock); CStoreOptions *options = CStoreGetOptions(relationId); - InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount, + options->stripeRowCount, options->compressionType); heap_close(relation, AccessExclusiveLock); } } @@ -797,7 +798,8 @@ TruncateCStoreTables(List *cstoreRelationList) Assert(IsCStoreFdwTable(relationId)); FdwNewRelFileNode(relation); - InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount); + InitCStoreDataFileMetadata(relation->rd_node.relNode, options->blockRowCount, + options->stripeRowCount, options->compressionType); } } diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 1bfc4be49..793f3dd7f 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -57,7 +57,7 @@ static Oid CStoreDataFilesIndexRelationId(void); static Oid CStoreSkipNodesRelationId(void); static Oid CStoreSkipNodesIndexRelationId(void); static Oid CStoreNamespaceId(void); -static bool ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount); +static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata); static ModifyState * StartModifyRelation(Relation rel); static void InsertTupleAndEnforceConstraints(ModifyState *state, Datum *values, bool *nulls); @@ -68,11 +68,31 @@ static bytea * DatumToBytea(Datum value, Form_pg_attribute attrForm); static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); /* constants for cstore_table */ -#define Natts_cstore_data_files 4 +#define Natts_cstore_data_files 6 #define Anum_cstore_data_files_relfilenode 1 #define Anum_cstore_data_files_block_row_count 2 -#define Anum_cstore_data_files_version_major 3 -#define Anum_cstore_data_files_version_minor 4 +#define Anum_cstore_data_files_stripe_row_count 3 +#define Anum_cstore_data_files_compression 4 +#define Anum_cstore_data_files_version_major 5 +#define Anum_cstore_data_files_version_minor 6 + +/* ---------------- + * cstore.cstore_data_files definition. + * ---------------- + */ +typedef struct FormData_cstore_data_files +{ + Oid relfilenode; + int32 block_row_count; + int32 stripe_row_count; + NameData compression; + int64 version_major; + int64 version_minor; + +#ifdef CATALOG_VARLEN /* variable-length fields start here */ +#endif +} FormData_cstore_data_files; +typedef FormData_cstore_data_files *Form_cstore_data_files; /* constants for cstore_stripe */ #define Natts_cstore_stripes 8 @@ -106,16 +126,22 @@ static Datum ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm); * in cstore_data_files. */ void -InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount) +InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount, + CompressionType compression) { Oid cstoreDataFilesOid = InvalidOid; Relation cstoreDataFiles = NULL; ModifyState *modifyState = NULL; + NameData compressionName = { 0 }; + + namestrcpy(&compressionName, CompressionTypeStr(compression)); bool nulls[Natts_cstore_data_files] = { 0 }; Datum values[Natts_cstore_data_files] = { ObjectIdGetDatum(relfilenode), Int32GetDatum(blockRowCount), + Int32GetDatum(stripeRowCount), + NameGetDatum(&compressionName), Int32GetDatum(CSTORE_VERSION_MAJOR), Int32GetDatum(CSTORE_VERSION_MINOR) }; @@ -135,6 +161,84 @@ InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount) } +void +UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount, + CompressionType compression) +{ + const int scanKeyCount = 1; + ScanKeyData scanKey[1]; + bool indexOK = true; + SysScanDesc scanDescriptor = NULL; + Form_cstore_data_files metadata = NULL; + HeapTuple heapTuple = NULL; + Datum values[Natts_cstore_data_files] = { 0 }; + bool isnull[Natts_cstore_data_files] = { 0 }; + bool replace[Natts_cstore_data_files] = { 0 }; + + Relation cstoreDataFiles = heap_open(CStoreDataFilesRelationId(), RowExclusiveLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles); + + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, + F_INT8EQ, ObjectIdGetDatum(relfilenode)); + + scanDescriptor = systable_beginscan(cstoreDataFiles, + CStoreDataFilesIndexRelationId(), + indexOK, + NULL, scanKeyCount, scanKey); + + heapTuple = systable_getnext(scanDescriptor); + if (heapTuple == NULL) + { + ereport(ERROR, (errmsg("relfilenode %d doesn't belong to a cstore table", + relfilenode))); + } + + metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple); + + bool changed = false; + if (metadata->block_row_count != blockRowCount) + { + values[Anum_cstore_data_files_block_row_count - 1] = Int32GetDatum(blockRowCount); + isnull[Anum_cstore_data_files_block_row_count - 1] = false; + replace[Anum_cstore_data_files_block_row_count - 1] = true; + changed = true; + } + + if (metadata->stripe_row_count != stripeRowCount) + { + values[Anum_cstore_data_files_stripe_row_count - 1] = Int32GetDatum( + stripeRowCount); + isnull[Anum_cstore_data_files_stripe_row_count - 1] = false; + replace[Anum_cstore_data_files_stripe_row_count - 1] = true; + changed = true; + } + + if (ParseCompressionType(NameStr(metadata->compression)) != compression) + { + Name compressionName = palloc0(sizeof(NameData)); + namestrcpy(compressionName, CompressionTypeStr(compression)); + values[Anum_cstore_data_files_compression - 1] = NameGetDatum(compressionName); + isnull[Anum_cstore_data_files_compression - 1] = false; + replace[Anum_cstore_data_files_compression - 1] = true; + changed = true; + } + + if (changed) + { + heapTuple = heap_modify_tuple(heapTuple, tupleDescriptor, values, isnull, + replace); + + CatalogTupleUpdate(cstoreDataFiles, &heapTuple->t_self, heapTuple); + + CommandCounterIncrement(); + } + + systable_endscan(scanDescriptor); + + heap_close(cstoreDataFiles, NoLock); +} + + /* * SaveStripeSkipList saves StripeSkipList for a given stripe as rows * of cstore_skipnodes. @@ -355,7 +459,7 @@ DataFileMetadata * ReadDataFileMetadata(Oid relfilenode, bool missingOk) { DataFileMetadata *datafileMetadata = palloc0(sizeof(DataFileMetadata)); - bool found = ReadCStoreDataFiles(relfilenode, &datafileMetadata->blockRowCount); + bool found = ReadCStoreDataFiles(relfilenode, datafileMetadata); if (!found) { if (!missingOk) @@ -555,7 +659,7 @@ ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) * false if table was not found in cstore_data_files. */ static bool -ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount) +ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) { bool found = false; Oid cstoreDataFilesOid = InvalidOid; @@ -599,8 +703,19 @@ ReadCStoreDataFiles(Oid relfilenode, uint64 *blockRowCount) Datum datumArray[Natts_cstore_data_files]; bool isNullArray[Natts_cstore_data_files]; heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - *blockRowCount = DatumGetInt32(datumArray[Anum_cstore_data_files_block_row_count - - 1]); + + if (metadata) + { + Name compressionName = NULL; + + metadata->blockRowCount = DatumGetInt32( + datumArray[Anum_cstore_data_files_block_row_count - 1]); + metadata->stripeRowCount = DatumGetInt32( + datumArray[Anum_cstore_data_files_stripe_row_count - 1]); + compressionName = DatumGetName( + datumArray[Anum_cstore_data_files_compression - 1]); + metadata->compression = ParseCompressionType(NameStr(*compressionName)); + } found = true; } diff --git a/cstore_tableam.c b/cstore_tableam.c index b1624f59f..ce7d7de97 100644 --- a/cstore_tableam.c +++ b/cstore_tableam.c @@ -97,11 +97,15 @@ static bool IsCStoreTableAmTable(Oid relationId); static bool ConditionalLockRelationWithTimeout(Relation rel, LOCKMODE lockMode, int timeout, int retryInterval); static void LogRelationStats(Relation rel, int elevel); -static char * CompressionTypeStr(CompressionType type); static void TruncateCStore(Relation rel, int elevel); + +/* + * CStoreTableAMDefaultOptions returns the default options for a cstore table am table. + * These options are based on the GUC's controlling the defaults. + */ static CStoreOptions * -CStoreTableAMGetOptions(void) +CStoreTableAMDefaultOptions() { CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); cstoreOptions->compressionType = cstore_compression; @@ -111,6 +115,27 @@ CStoreTableAMGetOptions(void) } +/* + * CStoreTableAMGetOptions returns the options based on a relation. It is advised the + * relation is a cstore table am table, if not it will raise an error + */ +static CStoreOptions * +CStoreTableAMGetOptions(Relation rel) +{ + CStoreOptions *cstoreOptions = NULL; + DataFileMetadata *metadata = NULL; + + Assert(rel != NULL); + + cstoreOptions = palloc0(sizeof(CStoreOptions)); + metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + cstoreOptions->compressionType = metadata->compression; + cstoreOptions->stripeRowCount = metadata->stripeRowCount; + cstoreOptions->blockRowCount = metadata->blockRowCount; + return cstoreOptions; +} + + static MemoryContext GetCStoreMemoryContext() { @@ -145,7 +170,7 @@ cstore_init_write_state(Relation relation) if (CStoreWriteState == NULL) { - CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(); + CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relation); TupleDesc tupdesc = RelationGetDescr(relation); elog(LOG, "initializing write state for relation %d", relation->rd_id); @@ -534,17 +559,23 @@ cstore_relation_set_new_filenode(Relation rel, SMgrRelation srel; DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); uint64 blockRowCount = 0; + uint64 stripeRowCount = 0; + CompressionType compression = 0; if (metadata != NULL) { /* existing table (e.g. TRUNCATE), use existing blockRowCount */ blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; } else { /* new table, use options */ - CStoreOptions *options = CStoreTableAMGetOptions(); + CStoreOptions *options = CStoreTableAMDefaultOptions(); blockRowCount = options->blockRowCount; + stripeRowCount = options->stripeRowCount; + compression = options->compressionType; } /* delete old relfilenode metadata */ @@ -554,7 +585,8 @@ cstore_relation_set_new_filenode(Relation rel, *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); srel = RelationCreateStorage(*newrnode, persistence); - InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount); + InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount, stripeRowCount, + compression); smgrclose(srel); } @@ -575,7 +607,8 @@ cstore_relation_nontransactional_truncate(Relation rel) /* Delete old relfilenode metadata and recreate it */ DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); - InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount); + InitCStoreDataFileMetadata(rel->rd_node.relNode, metadata->blockRowCount, + metadata->stripeRowCount, metadata->compression); } @@ -623,7 +656,19 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, */ Assert(sourceDesc->natts == targetDesc->natts); - cstoreOptions = CStoreTableAMGetOptions(); + /* + * Since we are copying into a new relation we need to copy the settings from the old + * relation first. + */ + + cstoreOptions = CStoreTableAMGetOptions(OldHeap); + + UpdateCStoreDataFileMetadata(NewHeap->rd_node.relNode, + cstoreOptions->blockRowCount, + cstoreOptions->stripeRowCount, + cstoreOptions->compressionType); + + cstoreOptions = CStoreTableAMGetOptions(NewHeap); writeState = CStoreBeginWrite(NewHeap, cstoreOptions->compressionType, @@ -756,26 +801,6 @@ LogRelationStats(Relation rel, int elevel) } -/* - * CompressionTypeStr returns string representation of a compression type. - */ -static char * -CompressionTypeStr(CompressionType type) -{ - switch (type) - { - case COMPRESSION_NONE: - return "none"; - - case COMPRESSION_PG_LZ: - return "pglz"; - - default: - return "unknown"; - } -} - - /* * TruncateCStore truncates the unused space at the end of main fork for * a cstore table. This unused space can be created by aborted transactions. @@ -1262,3 +1287,133 @@ cstore_tableam_handler(PG_FUNCTION_ARGS) { PG_RETURN_POINTER(&cstore_am_methods); } + + +/* + * alter_cstore_table_set is a UDF exposed in postgres to change settings on a columnar + * table. Calling this function on a non-columnar table gives an error. + * + * sql syntax: + * pg_catalog.alter_cstore_table_set( + * table_name regclass, + * block_row_count int DEFAULT NULL, + * stripe_row_count int DEFAULT NULL, + * compression name DEFAULT null) + * + * All arguments except the table name are optional. The UDF is supposed to be called + * like: + * SELECT alter_cstore_table_set('table', compression => 'pglz'); + * + * This will only update the compression of the table, keeping all other settings the + * same. Multiple settings can be changed at the same time by providing multiple + * arguments. Calling the argument with the NULL value will be interperted as not having + * provided the argument. + */ +PG_FUNCTION_INFO_V1(alter_cstore_table_set); +Datum +alter_cstore_table_set(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + int blockRowCount = 0; + int stripeRowCount = 0; + CompressionType compression = COMPRESSION_TYPE_INVALID; + + Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */ + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + if (!metadata) + { + ereport(ERROR, (errmsg("table %s is not a cstore table", + quote_identifier(RelationGetRelationName(rel))))); + } + + blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; + + /* block_row_count => not null */ + if (!PG_ARGISNULL(1)) + { + blockRowCount = PG_GETARG_INT32(1); + ereport(DEBUG1, (errmsg("updating block row count to %d", blockRowCount))); + } + + /* stripe_row_count => not null */ + if (!PG_ARGISNULL(2)) + { + stripeRowCount = PG_GETARG_INT32(2); + ereport(DEBUG1, (errmsg("updating stripe row count to %d", stripeRowCount))); + } + + /* compression => not null */ + if (!PG_ARGISNULL(3)) + { + Name compressionName = PG_GETARG_NAME(3); + compression = ParseCompressionType(NameStr(*compressionName)); + if (compression == COMPRESSION_TYPE_INVALID) + { + ereport(ERROR, (errmsg("unknown compression type for cstore table: %s", + quote_identifier(NameStr(*compressionName))))); + } + ereport(DEBUG1, (errmsg("updating compression to %s", + CompressionTypeStr(compression)))); + } + + UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount, + compression); + + table_close(rel, NoLock); + + PG_RETURN_VOID(); +} + + +PG_FUNCTION_INFO_V1(alter_cstore_table_reset); +Datum +alter_cstore_table_reset(PG_FUNCTION_ARGS) +{ + Oid relationId = PG_GETARG_OID(0); + int blockRowCount = 0; + int stripeRowCount = 0; + CompressionType compression = COMPRESSION_TYPE_INVALID; + + Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */ + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); + if (!metadata) + { + ereport(ERROR, (errmsg("table %s is not a cstore table", + quote_identifier(RelationGetRelationName(rel))))); + } + + blockRowCount = metadata->blockRowCount; + stripeRowCount = metadata->stripeRowCount; + compression = metadata->compression; + + /* block_row_count => true */ + if (!PG_ARGISNULL(1) && PG_GETARG_BOOL(1)) + { + blockRowCount = cstore_block_row_count; + ereport(DEBUG1, (errmsg("resetting block row count to %d", blockRowCount))); + } + + /* stripe_row_count => true */ + if (!PG_ARGISNULL(2) && PG_GETARG_BOOL(2)) + { + stripeRowCount = cstore_stripe_row_count; + ereport(DEBUG1, (errmsg("resetting stripe row count to %d", stripeRowCount))); + } + + /* compression => true */ + if (!PG_ARGISNULL(3) && PG_GETARG_BOOL(3)) + { + compression = cstore_compression; + ereport(DEBUG1, (errmsg("resetting compression to %s", + CompressionTypeStr(compression)))); + } + + UpdateCStoreDataFileMetadata(rel->rd_node.relNode, blockRowCount, stripeRowCount, + compression); + + table_close(rel, NoLock); + + PG_RETURN_VOID(); +} diff --git a/expected/am_tableoptions.out b/expected/am_tableoptions.out new file mode 100644 index 000000000..e5e0f9a4f --- /dev/null +++ b/expected/am_tableoptions.out @@ -0,0 +1,179 @@ +CREATE SCHEMA am_tableoptions; +SET search_path TO am_tableoptions; +CREATE TABLE table_options (a int) USING cstore_tableam; +INSERT INTO table_options SELECT generate_series(1,100); +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 150000 | none +(1 row) + +-- test changing the compression +SELECT alter_cstore_table_set('table_options', compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 150000 | pglz +(1 row) + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', block_row_count => 10); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 150000 | pglz +(1 row) + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', stripe_row_count => 100); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 100 | pglz +(1 row) + +-- VACUUM FULL creates a new table, make sure it copies settings from the table you are vacuuming +VACUUM FULL table_options; +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10 | 100 | pglz +(1 row) + +-- set all settings at the same time +SELECT alter_cstore_table_set('table_options', stripe_row_count => 1000, block_row_count => 100, compression => 'none'); + alter_cstore_table_set +------------------------ + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 100 | 1000 | none +(1 row) + +-- reset settings one by one to the version of the GUC's +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 10000; +SET cstore.compression TO 'pglz'; +-- verify setting the GUC's didn't change the settings +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 100 | 1000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', block_row_count => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 1000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', stripe_row_count => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | none +(1 row) + +SELECT alter_cstore_table_reset('table_options', compression => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | pglz +(1 row) + +-- verify resetting all settings at once work +SET cstore.block_row_count TO 10000; +SET cstore.stripe_row_count TO 100000; +SET cstore.compression TO 'none'; +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 1000 | 10000 | pglz +(1 row) + +SELECT alter_cstore_table_reset( + 'table_options', + block_row_count => true, + stripe_row_count => true, + compression => true); + alter_cstore_table_reset +-------------------------- + +(1 row) + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + regclass | block_row_count | stripe_row_count | compression +---------------+-----------------+------------------+------------- + table_options | 10000 | 100000 | none +(1 row) + +-- verify edge cases +-- first start with a table that is not a cstore table +CREATE TABLE not_a_cstore_table (a int); +SELECT alter_cstore_table_set('not_a_cstore_table', compression => 'pglz'); +ERROR: table not_a_cstore_table is not a cstore table +SELECT alter_cstore_table_reset('not_a_cstore_table', compression => true); +ERROR: table not_a_cstore_table is not a cstore table +-- verify you can't use a compression that is not known +SELECT alter_cstore_table_set('table_options', compression => 'foobar'); +ERROR: unknown compression type for cstore table: foobar +SET client_min_messages TO warning; +DROP SCHEMA am_tableoptions CASCADE; diff --git a/expected/am_vacuum.out b/expected/am_vacuum.out index d1270a3d2..3975be12b 100644 --- a/expected/am_vacuum.out +++ b/expected/am_vacuum.out @@ -36,7 +36,12 @@ SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.r (1 row) -- test the case when all data cannot fit into a single stripe -SET cstore.stripe_row_count TO 1000; +SELECT alter_cstore_table_set('t', stripe_row_count => 1000); + alter_cstore_table_set +------------------------ + +(1 row) + INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; SELECT sum(a), sum(b) FROM t; sum | sum @@ -157,14 +162,25 @@ SELECT count(*) FROM t; -- add some stripes with different compression types and create some gaps, -- then vacuum to print stats BEGIN; -SET cstore.block_row_count TO 1000; -SET cstore.stripe_row_count TO 2000; -SET cstore.compression TO "pglz"; +SELECT alter_cstore_table_set('t', + block_row_count => 1000, + stripe_row_count => 2000, + compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + SAVEPOINT s1; INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s1; INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; -SET cstore.compression TO "none"; +SELECT alter_cstore_table_set('t', compression => 'none'); + alter_cstore_table_set +------------------------ + +(1 row) + SAVEPOINT s2; INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s2; @@ -195,7 +211,12 @@ block count: 11, containing data for dropped columns: 2, none compressed: 9, pgl -- vacuum full should remove blocks for dropped columns -- note that, a block will be stored in non-compressed for if compression -- doesn't reduce its size. -SET cstore.compression TO "pglz"; +SELECT alter_cstore_table_set('t', compression => 'pglz'); + alter_cstore_table_set +------------------------ + +(1 row) + VACUUM FULL t; VACUUM VERBOSE t; INFO: statistics for "t": diff --git a/sql/am_tableoptions.sql b/sql/am_tableoptions.sql new file mode 100644 index 000000000..33f26ec76 --- /dev/null +++ b/sql/am_tableoptions.sql @@ -0,0 +1,102 @@ +CREATE SCHEMA am_tableoptions; +SET search_path TO am_tableoptions; + +CREATE TABLE table_options (a int) USING cstore_tableam; +INSERT INTO table_options SELECT generate_series(1,100); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the compression +SELECT alter_cstore_table_set('table_options', compression => 'pglz'); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', block_row_count => 10); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- test changing the block_row_count +SELECT alter_cstore_table_set('table_options', stripe_row_count => 100); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- VACUUM FULL creates a new table, make sure it copies settings from the table you are vacuuming +VACUUM FULL table_options; + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- set all settings at the same time +SELECT alter_cstore_table_set('table_options', stripe_row_count => 1000, block_row_count => 100, compression => 'none'); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- reset settings one by one to the version of the GUC's +SET cstore.block_row_count TO 1000; +SET cstore.stripe_row_count TO 10000; +SET cstore.compression TO 'pglz'; + +-- verify setting the GUC's didn't change the settings +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', block_row_count => true); +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', stripe_row_count => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset('table_options', compression => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- verify resetting all settings at once work +SET cstore.block_row_count TO 10000; +SET cstore.stripe_row_count TO 100000; +SET cstore.compression TO 'none'; + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +SELECT alter_cstore_table_reset( + 'table_options', + block_row_count => true, + stripe_row_count => true, + compression => true); + +-- show table_options settings +SELECT * FROM cstore.cstore_options +WHERE regclass = 'table_options'::regclass; + +-- verify edge cases +-- first start with a table that is not a cstore table +CREATE TABLE not_a_cstore_table (a int); +SELECT alter_cstore_table_set('not_a_cstore_table', compression => 'pglz'); +SELECT alter_cstore_table_reset('not_a_cstore_table', compression => true); + +-- verify you can't use a compression that is not known +SELECT alter_cstore_table_set('table_options', compression => 'foobar'); + +SET client_min_messages TO warning; +DROP SCHEMA am_tableoptions CASCADE; diff --git a/sql/am_vacuum.sql b/sql/am_vacuum.sql index f7f9d77bd..6d248a147 100644 --- a/sql/am_vacuum.sql +++ b/sql/am_vacuum.sql @@ -18,7 +18,7 @@ SELECT sum(a), sum(b) FROM t; SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; -- test the case when all data cannot fit into a single stripe -SET cstore.stripe_row_count TO 1000; +SELECT alter_cstore_table_set('t', stripe_row_count => 1000); INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; SELECT sum(a), sum(b) FROM t; @@ -65,14 +65,15 @@ SELECT count(*) FROM t; -- then vacuum to print stats BEGIN; -SET cstore.block_row_count TO 1000; -SET cstore.stripe_row_count TO 2000; -SET cstore.compression TO "pglz"; +SELECT alter_cstore_table_set('t', + block_row_count => 1000, + stripe_row_count => 2000, + compression => 'pglz'); SAVEPOINT s1; INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s1; INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; -SET cstore.compression TO "none"; +SELECT alter_cstore_table_set('t', compression => 'none'); SAVEPOINT s2; INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s2; @@ -93,7 +94,7 @@ VACUUM VERBOSE t; -- vacuum full should remove blocks for dropped columns -- note that, a block will be stored in non-compressed for if compression -- doesn't reduce its size. -SET cstore.compression TO "pglz"; +SELECT alter_cstore_table_set('t', compression => 'pglz'); VACUUM FULL t; VACUUM VERBOSE t; From 5db380f33a3f94539a081e6c9d88f40205bb3aa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanefi=20=C3=96nald=C4=B1?= Date: Fri, 30 Oct 2020 13:52:22 +0300 Subject: [PATCH 091/124] Prevent undistribute_table calls for foreign tables --- .../commands/create_distributed_table.c | 6 ++++++ src/test/regress/expected/undistribute_table.out | 16 ++++++++++++++++ src/test/regress/multi_schedule | 5 ++++- src/test/regress/sql/undistribute_table.sql | 11 +++++++++++ 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index c8f3ac587..c7cb26887 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -1585,6 +1585,12 @@ UndistributeTable(Oid relationId) "because a foreign key references to it."))); } + char relationKind = get_rel_relkind(relationId); + if (relationKind == RELKIND_FOREIGN_TABLE) + { + ereport(ERROR, (errmsg("Cannot undistribute table " + "because it is a foreign table."))); + } List *preLoadCommands = GetPreLoadTableCreationCommands(relationId, true); List *postLoadCommands = GetPostLoadTableCreationCommands(relationId); diff --git a/src/test/regress/expected/undistribute_table.out b/src/test/regress/expected/undistribute_table.out index a15631efb..0d664d895 100644 --- a/src/test/regress/expected/undistribute_table.out +++ b/src/test/regress/expected/undistribute_table.out @@ -126,6 +126,22 @@ ERROR: Cannot undistribute table because a foreign key references to it. SELECT undistribute_table('referencing_table'); ERROR: Cannot undistribute table because it has a foreign key. DROP TABLE referenced_table, referencing_table; +-- test distributed foreign tables +-- we expect errors +CREATE FOREIGN TABLE foreign_table ( + id bigint not null, + full_name text not null default '' +) SERVER fake_fdw_server OPTIONS (encoding 'utf-8', compression 'true'); +SELECT create_distributed_table('foreign_table', 'id'); +NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +SELECT undistribute_table('foreign_table'); +ERROR: Cannot undistribute table because it is a foreign table. +DROP FOREIGN TABLE foreign_table; -- test partitioned tables CREATE TABLE partitioned_table (id INT, a INT) PARTITION BY RANGE (id); CREATE TABLE partitioned_table_1_5 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (5); diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 57ac0f47a..43fc468d5 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -65,6 +65,10 @@ test: ensure_no_intermediate_data_leak # ---------- test: multi_partitioning_utils multi_partitioning replicated_partitioned_table +# ---------- +# Tests for foreign data wrapper support +# ---------- +test: multi_create_fdw # ---------- # Tests for recursive subquery planning @@ -199,7 +203,6 @@ test: multi_outer_join # Note that the order of the following tests are important. multi_complex_count_distinct # is independent from the rest of the group, it is added to increase parallelism. # --- -test: multi_create_fdw test: multi_complex_count_distinct multi_select_distinct test: multi_modifications test: multi_distribution_metadata diff --git a/src/test/regress/sql/undistribute_table.sql b/src/test/regress/sql/undistribute_table.sql index ca393c5d9..d019d89cc 100644 --- a/src/test/regress/sql/undistribute_table.sql +++ b/src/test/regress/sql/undistribute_table.sql @@ -52,6 +52,17 @@ SELECT undistribute_table('referencing_table'); DROP TABLE referenced_table, referencing_table; +-- test distributed foreign tables +-- we expect errors +CREATE FOREIGN TABLE foreign_table ( + id bigint not null, + full_name text not null default '' +) SERVER fake_fdw_server OPTIONS (encoding 'utf-8', compression 'true'); +SELECT create_distributed_table('foreign_table', 'id'); +SELECT undistribute_table('foreign_table'); + +DROP FOREIGN TABLE foreign_table; + -- test partitioned tables CREATE TABLE partitioned_table (id INT, a INT) PARTITION BY RANGE (id); CREATE TABLE partitioned_table_1_5 PARTITION OF partitioned_table FOR VALUES FROM (1) TO (5); From 85a4b61a0e6b7dc33211d0fd7735a0d817ab9519 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanefi=20=C3=96nald=C4=B1?= Date: Mon, 2 Nov 2020 13:48:08 +0300 Subject: [PATCH 092/124] Prevent undistribute_table calls for partitions --- .../distributed/commands/create_distributed_table.c | 10 ++++++++++ src/test/regress/expected/undistribute_table.out | 5 +++++ src/test/regress/sql/undistribute_table.sql | 3 +++ 3 files changed, 18 insertions(+) diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index c7cb26887..33a2b7212 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -1592,6 +1592,16 @@ UndistributeTable(Oid relationId) "because it is a foreign table."))); } + if (PartitionTable(relationId)) + { + Oid parentRelationId = PartitionParentOid(relationId); + char *parentRelationName = get_rel_name(parentRelationId); + ereport(ERROR, (errmsg("Cannot undistribute table " + "because it is a partition."), + errhint("Undistribute the partitioned table \"%s\" instead.", + parentRelationName))); + } + List *preLoadCommands = GetPreLoadTableCreationCommands(relationId, true); List *postLoadCommands = GetPostLoadTableCreationCommands(relationId); diff --git a/src/test/regress/expected/undistribute_table.out b/src/test/regress/expected/undistribute_table.out index 0d664d895..7fac4ec29 100644 --- a/src/test/regress/expected/undistribute_table.out +++ b/src/test/regress/expected/undistribute_table.out @@ -194,6 +194,11 @@ SELECT * FROM partitioned_table_6_10 ORDER BY 1, 2; 7 | 2 (1 row) +-- undistributing partitions are not supported +SELECT undistribute_table('partitioned_table_1_5'); +ERROR: Cannot undistribute table because it is a partition. +HINT: Undistribute the partitioned table "partitioned_table" instead. +-- we can undistribute partitioned parent tables SELECT undistribute_table('partitioned_table'); NOTICE: Undistributing the partitions of undistribute_table.partitioned_table NOTICE: Creating a new local table for undistribute_table.partitioned_table_1_5 diff --git a/src/test/regress/sql/undistribute_table.sql b/src/test/regress/sql/undistribute_table.sql index d019d89cc..ad133b5e2 100644 --- a/src/test/regress/sql/undistribute_table.sql +++ b/src/test/regress/sql/undistribute_table.sql @@ -77,6 +77,9 @@ SELECT * FROM partitioned_table ORDER BY 1, 2; SELECT * FROM partitioned_table_1_5 ORDER BY 1, 2; SELECT * FROM partitioned_table_6_10 ORDER BY 1, 2; +-- undistributing partitions are not supported +SELECT undistribute_table('partitioned_table_1_5'); +-- we can undistribute partitioned parent tables SELECT undistribute_table('partitioned_table'); SELECT logicalrelid FROM pg_dist_partition WHERE logicalrelid::regclass::text LIKE 'partitioned\_table%' ORDER BY 1; From d6f19e2298251502e2a8356f8e9723bc7c0db079 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hanefi=20=C3=96nald=C4=B1?= Date: Tue, 3 Nov 2020 14:35:24 +0300 Subject: [PATCH 093/124] Honor error message conventions --- .../commands/create_distributed_table.c | 31 +++++++++---------- .../regress/expected/citus_local_tables.out | 4 +-- .../expected/isolation_undistribute_table.out | 2 +- src/test/regress/expected/pg13_with_ties.out | 2 +- src/test/regress/expected/single_node.out | 2 +- .../regress/expected/undistribute_table.out | 28 ++++++++--------- 6 files changed, 34 insertions(+), 35 deletions(-) diff --git a/src/backend/distributed/commands/create_distributed_table.c b/src/backend/distributed/commands/create_distributed_table.c index 33a2b7212..a57940aaa 100644 --- a/src/backend/distributed/commands/create_distributed_table.c +++ b/src/backend/distributed/commands/create_distributed_table.c @@ -1560,45 +1560,44 @@ UndistributeTable(Oid relationId) Relation relation = try_relation_open(relationId, ExclusiveLock); if (relation == NULL) { - ereport(ERROR, (errmsg("Cannot undistribute table"), - errdetail("No such distributed table exists. " - "Might have already been undistributed."))); + ereport(ERROR, (errmsg("cannot undistribute table"), + errdetail("because no such distributed table exists"))); } relation_close(relation, NoLock); if (!IsCitusTable(relationId)) { - ereport(ERROR, (errmsg("Cannot undistribute table."), - errdetail("The table is not distributed."))); + ereport(ERROR, (errmsg("cannot undistribute table "), + errdetail("because the table is not distributed"))); } if (TableReferencing(relationId)) { - ereport(ERROR, (errmsg("Cannot undistribute table " - "because it has a foreign key."))); + ereport(ERROR, (errmsg("cannot undistribute table " + "because it has a foreign key"))); } if (TableReferenced(relationId)) { - ereport(ERROR, (errmsg("Cannot undistribute table " - "because a foreign key references to it."))); + ereport(ERROR, (errmsg("cannot undistribute table " + "because a foreign key references to it"))); } char relationKind = get_rel_relkind(relationId); if (relationKind == RELKIND_FOREIGN_TABLE) { - ereport(ERROR, (errmsg("Cannot undistribute table " - "because it is a foreign table."))); + ereport(ERROR, (errmsg("cannot undistribute table " + "because it is a foreign table"))); } if (PartitionTable(relationId)) { Oid parentRelationId = PartitionParentOid(relationId); char *parentRelationName = get_rel_name(parentRelationId); - ereport(ERROR, (errmsg("Cannot undistribute table " - "because it is a partition."), - errhint("Undistribute the partitioned table \"%s\" instead.", + ereport(ERROR, (errmsg("cannot undistribute table " + "because it is a partition"), + errhint("undistribute the partitioned table \"%s\" instead", parentRelationName))); } @@ -1620,7 +1619,7 @@ UndistributeTable(Oid relationId) if (PartitionedTable(relationId)) { - ereport(NOTICE, (errmsg("Undistributing the partitions of %s", + ereport(NOTICE, (errmsg("undistributing the partitions of %s", quote_qualified_identifier(schemaName, relationName)))); List *partitionList = PartitionList(relationId); Oid partitionRelationId = InvalidOid; @@ -1651,7 +1650,7 @@ UndistributeTable(Oid relationId) char *tableCreationCommand = NULL; - ereport(NOTICE, (errmsg("Creating a new local table for %s", + ereport(NOTICE, (errmsg("creating a new local table for %s", quote_qualified_identifier(schemaName, relationName)))); foreach_ptr(tableCreationCommand, preLoadCommands) diff --git a/src/test/regress/expected/citus_local_tables.out b/src/test/regress/expected/citus_local_tables.out index 0d7685ed5..6d7446df5 100644 --- a/src/test/regress/expected/citus_local_tables.out +++ b/src/test/regress/expected/citus_local_tables.out @@ -333,7 +333,7 @@ ERROR: Table 'citus_local_table_1' is a citus local table. Replicating shard of -- undistribute_table is supported BEGIN; SELECT undistribute_table('citus_local_table_1'); -NOTICE: Creating a new local table for citus_local_tables_test_schema.citus_local_table_1 +NOTICE: creating a new local table for citus_local_tables_test_schema.citus_local_table_1 NOTICE: Moving the data of citus_local_tables_test_schema.citus_local_table_1 NOTICE: executing the command locally: SELECT a FROM citus_local_tables_test_schema.citus_local_table_1_1504027 citus_local_table_1 NOTICE: Dropping the old citus_local_tables_test_schema.citus_local_table_1 @@ -559,7 +559,7 @@ FROM pg_dist_partition WHERE logicalrelid = 'citus_local_table_4'::regclass; (1 row) SELECT column_name_to_column('citus_local_table_4', 'a'); - column_name_to_column + column_name_to_column --------------------------------------------------------------------- {VAR :varno 1 :varattno 1 :vartype 23 :vartypmod -1 :varcollid 0 :varlevelsup 0 :varnoold 1 :varoattno 1 :location -1} (1 row) diff --git a/src/test/regress/expected/isolation_undistribute_table.out b/src/test/regress/expected/isolation_undistribute_table.out index f20098ede..7d4d11a4b 100644 --- a/src/test/regress/expected/isolation_undistribute_table.out +++ b/src/test/regress/expected/isolation_undistribute_table.out @@ -17,7 +17,7 @@ step s1-commit: COMMIT; step s2-undistribute: <... completed> -error in steps s1-commit s2-undistribute: ERROR: Cannot undistribute table +error in steps s1-commit s2-undistribute: ERROR: cannot undistribute table starting permutation: s1-begin s1-undistribute s2-select s1-commit step s1-begin: diff --git a/src/test/regress/expected/pg13_with_ties.out b/src/test/regress/expected/pg13_with_ties.out index 5219b093d..51f2471ef 100644 --- a/src/test/regress/expected/pg13_with_ties.out +++ b/src/test/regress/expected/pg13_with_ties.out @@ -154,7 +154,7 @@ SELECT * FROM with_ties_table_2 ORDER BY a, b; TRUNCATE with_ties_table_2; -- test INSERT SELECTs into distributed table with a different distribution column SELECT undistribute_table('with_ties_table_2'); -NOTICE: Creating a new local table for public.with_ties_table_2 +NOTICE: creating a new local table for public.with_ties_table_2 NOTICE: Moving the data of public.with_ties_table_2 NOTICE: Dropping the old public.with_ties_table_2 NOTICE: Renaming the new table to public.with_ties_table_2 diff --git a/src/test/regress/expected/single_node.out b/src/test/regress/expected/single_node.out index ea73d9fa0..c595983a1 100644 --- a/src/test/regress/expected/single_node.out +++ b/src/test/regress/expected/single_node.out @@ -775,7 +775,7 @@ RESET citus.task_executor_type; -- make sure undistribute table works fine ALTER TABLE test DROP CONSTRAINT foreign_key; SELECT undistribute_table('test_2'); -NOTICE: Creating a new local table for single_node.test_2 +NOTICE: creating a new local table for single_node.test_2 NOTICE: Moving the data of single_node.test_2 NOTICE: Dropping the old single_node.test_2 NOTICE: Renaming the new table to single_node.test_2 diff --git a/src/test/regress/expected/undistribute_table.out b/src/test/regress/expected/undistribute_table.out index 7fac4ec29..54c323cdb 100644 --- a/src/test/regress/expected/undistribute_table.out +++ b/src/test/regress/expected/undistribute_table.out @@ -34,13 +34,13 @@ SELECT * FROM dist_table ORDER BY 1, 2, 3; -- we cannot immediately convert in the same statement, because -- the name->OID conversion happens at parse time. SELECT undistribute_table('dist_table'), create_distributed_table('dist_table', 'a'); -NOTICE: Creating a new local table for undistribute_table.dist_table +NOTICE: creating a new local table for undistribute_table.dist_table NOTICE: Moving the data of undistribute_table.dist_table NOTICE: Dropping the old undistribute_table.dist_table NOTICE: Renaming the new table to undistribute_table.dist_table ERROR: relation with OID XXXX does not exist SELECT undistribute_table('dist_table'); -NOTICE: Creating a new local table for undistribute_table.dist_table +NOTICE: creating a new local table for undistribute_table.dist_table NOTICE: Moving the data of undistribute_table.dist_table NOTICE: Dropping the old undistribute_table.dist_table NOTICE: Renaming the new table to undistribute_table.dist_table @@ -87,7 +87,7 @@ SELECT * FROM pg_indexes WHERE tablename = 'dist_table'; (1 row) SELECT undistribute_table('dist_table'); -NOTICE: Creating a new local table for undistribute_table.dist_table +NOTICE: creating a new local table for undistribute_table.dist_table NOTICE: Moving the data of undistribute_table.dist_table NOTICE: Dropping the old undistribute_table.dist_table NOTICE: Renaming the new table to undistribute_table.dist_table @@ -122,9 +122,9 @@ SELECT create_distributed_table('referencing_table', 'id'); INSERT INTO referencing_table VALUES (4, 6, 'cba'), (1, 1, 'dcba'), (2, 3, 'aaa'); SELECT undistribute_table('referenced_table'); -ERROR: Cannot undistribute table because a foreign key references to it. +ERROR: cannot undistribute table because a foreign key references to it SELECT undistribute_table('referencing_table'); -ERROR: Cannot undistribute table because it has a foreign key. +ERROR: cannot undistribute table because it has a foreign key DROP TABLE referenced_table, referencing_table; -- test distributed foreign tables -- we expect errors @@ -140,7 +140,7 @@ NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined (1 row) SELECT undistribute_table('foreign_table'); -ERROR: Cannot undistribute table because it is a foreign table. +ERROR: cannot undistribute table because it is a foreign table DROP FOREIGN TABLE foreign_table; -- test partitioned tables CREATE TABLE partitioned_table (id INT, a INT) PARTITION BY RANGE (id); @@ -196,20 +196,20 @@ SELECT * FROM partitioned_table_6_10 ORDER BY 1, 2; -- undistributing partitions are not supported SELECT undistribute_table('partitioned_table_1_5'); -ERROR: Cannot undistribute table because it is a partition. -HINT: Undistribute the partitioned table "partitioned_table" instead. +ERROR: cannot undistribute table because it is a partition +HINT: undistribute the partitioned table "partitioned_table" instead -- we can undistribute partitioned parent tables SELECT undistribute_table('partitioned_table'); -NOTICE: Undistributing the partitions of undistribute_table.partitioned_table -NOTICE: Creating a new local table for undistribute_table.partitioned_table_1_5 +NOTICE: undistributing the partitions of undistribute_table.partitioned_table +NOTICE: creating a new local table for undistribute_table.partitioned_table_1_5 NOTICE: Moving the data of undistribute_table.partitioned_table_1_5 NOTICE: Dropping the old undistribute_table.partitioned_table_1_5 NOTICE: Renaming the new table to undistribute_table.partitioned_table_1_5 -NOTICE: Creating a new local table for undistribute_table.partitioned_table_6_10 +NOTICE: creating a new local table for undistribute_table.partitioned_table_6_10 NOTICE: Moving the data of undistribute_table.partitioned_table_6_10 NOTICE: Dropping the old undistribute_table.partitioned_table_6_10 NOTICE: Renaming the new table to undistribute_table.partitioned_table_6_10 -NOTICE: Creating a new local table for undistribute_table.partitioned_table +NOTICE: creating a new local table for undistribute_table.partitioned_table NOTICE: Moving the data of undistribute_table.partitioned_table NOTICE: Dropping the old undistribute_table.partitioned_table NOTICE: Renaming the new table to undistribute_table.partitioned_table @@ -281,7 +281,7 @@ SELECT * FROM seq_table ORDER BY a; (3 rows) SELECT undistribute_table('seq_table'); -NOTICE: Creating a new local table for undistribute_table.seq_table +NOTICE: creating a new local table for undistribute_table.seq_table NOTICE: Moving the data of undistribute_table.seq_table NOTICE: Dropping the old undistribute_table.seq_table NOTICE: Renaming the new table to undistribute_table.seq_table @@ -346,7 +346,7 @@ SELECT * FROM another_schema.undis_view3 ORDER BY 1, 2; (3 rows) SELECT undistribute_table('view_table'); -NOTICE: Creating a new local table for undistribute_table.view_table +NOTICE: creating a new local table for undistribute_table.view_table NOTICE: Moving the data of undistribute_table.view_table NOTICE: Dropping the old undistribute_table.view_table NOTICE: drop cascades to 3 other objects From 630e579912d4725d6dc034a3bd4c18fd2355c096 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Sun, 1 Nov 2020 20:25:06 -0800 Subject: [PATCH 094/124] Handle case of partially-present metadata. --- cstore_metadata_tables.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cstore_metadata_tables.c b/cstore_metadata_tables.c index 793f3dd7f..a2eab1940 100644 --- a/cstore_metadata_tables.c +++ b/cstore_metadata_tables.c @@ -740,6 +740,15 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode) SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple = NULL; + /* + * During a restore for binary upgrade, metadata tables and indexes may or + * may not exist. + */ + if (IsBinaryUpgrade) + { + return; + } + ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); From 25de5b129037deb5b89494a7dbd17495a11c5e71 Mon Sep 17 00:00:00 2001 From: SaitTalhaNisanci Date: Wed, 4 Nov 2020 12:08:15 +0300 Subject: [PATCH 095/124] Fix uninitilized variable (#4293) Valgrind found that, we were doing an if check on uninitialized variable and it seems that this is on context.appendparents. https://github.com/postgres/postgres/blob/ac22929a2613e122708bd0172508ac863c51c1cc/src/backend/utils/adt/ruleutils.c#L1054 --- src/backend/distributed/deparser/ruleutils_13.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend/distributed/deparser/ruleutils_13.c b/src/backend/distributed/deparser/ruleutils_13.c index 040878150..3f2df06ed 100644 --- a/src/backend/distributed/deparser/ruleutils_13.c +++ b/src/backend/distributed/deparser/ruleutils_13.c @@ -7763,6 +7763,7 @@ pg_get_triggerdef_worker(Oid trigid, bool pretty) context.wrapColumn = WRAP_COLUMN_DEFAULT; context.indentLevel = PRETTYINDENT_STD; context.special_exprkind = EXPR_KIND_NONE; + context.appendparents = NULL; get_rule_expr(qual, &context, false); From cc8be422cee41722b155f725f82fcd005b1c7cb4 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Thu, 5 Nov 2020 14:21:02 +0300 Subject: [PATCH 096/124] Fix relkind checks in planner for relkinds other than RELKIND_RELATION (#4294) We were qualifying relations with relkind != RELKIND_RELATION as non-relations due to the strict checks around RangeTblEntry->relkind in planner. --- .../distributed/planner/distributed_planner.c | 19 +- .../planner/multi_logical_planner.c | 3 +- .../planner/multi_router_planner.c | 5 +- .../regress/expected/mixed_relkind_tests.out | 707 ++++++++++++++++++ .../regress/expected/multi_test_helpers.out | 14 + src/test/regress/multi_schedule | 2 +- src/test/regress/sql/mixed_relkind_tests.sql | 284 +++++++ src/test/regress/sql/multi_test_helpers.sql | 15 + 8 files changed, 1041 insertions(+), 8 deletions(-) create mode 100644 src/test/regress/expected/mixed_relkind_tests.out create mode 100644 src/test/regress/sql/mixed_relkind_tests.sql diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 13457520c..5f0795da4 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -2296,11 +2296,26 @@ GetRTEListProperties(List *rangeTableList) RangeTblEntry *rangeTableEntry = NULL; foreach_ptr(rangeTableEntry, rangeTableList) { - if (!(rangeTableEntry->rtekind == RTE_RELATION && - rangeTableEntry->relkind == RELKIND_RELATION)) + if (rangeTableEntry->rtekind != RTE_RELATION) { continue; } + else if (rangeTableEntry->relkind == RELKIND_VIEW) + { + /* + * Skip over views, distributed tables within (regular) views are + * already in rangeTableList. + */ + continue; + } + else if (rangeTableEntry->relkind == RELKIND_MATVIEW) + { + /* + * Skip over materialized views, here we should not consider + * materialized views as local tables. + */ + continue; + } Oid relationId = rangeTableEntry->relid; CitusTableCacheEntry *cacheEntry = LookupCitusTableCacheEntry(relationId); diff --git a/src/backend/distributed/planner/multi_logical_planner.c b/src/backend/distributed/planner/multi_logical_planner.c index 14a57032e..1f028ff0e 100644 --- a/src/backend/distributed/planner/multi_logical_planner.c +++ b/src/backend/distributed/planner/multi_logical_planner.c @@ -312,8 +312,7 @@ NodeTryGetRteRelid(Node *node) RangeTblEntry *rangeTableEntry = (RangeTblEntry *) node; - if (!(rangeTableEntry->rtekind == RTE_RELATION && - rangeTableEntry->relkind == RELKIND_RELATION)) + if (rangeTableEntry->rtekind != RTE_RELATION) { return InvalidOid; } diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index 9fdd55d97..e314aa4b5 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -518,14 +518,13 @@ static DeferredErrorMessage * ModifyPartialQuerySupported(Query *queryTree, bool multiShardQuery, Oid *distributedTableIdOutput) { - DeferredErrorMessage *deferredError = - DeferErrorIfUnsupportedModifyQueryWithLocalTable(queryTree); + DeferredErrorMessage *deferredError = DeferErrorIfModifyView(queryTree); if (deferredError != NULL) { return deferredError; } - deferredError = DeferErrorIfModifyView(queryTree); + deferredError = DeferErrorIfUnsupportedModifyQueryWithLocalTable(queryTree); if (deferredError != NULL) { return deferredError; diff --git a/src/test/regress/expected/mixed_relkind_tests.out b/src/test/regress/expected/mixed_relkind_tests.out new file mode 100644 index 000000000..27b7f26e4 --- /dev/null +++ b/src/test/regress/expected/mixed_relkind_tests.out @@ -0,0 +1,707 @@ +\SET VERBOSITY terse +invalid command \SET +SET citus.next_shard_id TO 1513000; +SET citus.shard_replication_factor TO 1; +CREATE SCHEMA mixed_relkind_tests; +SET search_path TO mixed_relkind_tests; +-- ensure that coordinator is added to pg_dist_node +SET client_min_messages TO ERROR; +SELECT 1 FROM master_add_node('localhost', :master_port, groupId => 0); + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +RESET client_min_messages; +-- make results consistent +SET citus.enable_cte_inlining TO OFF; +-- create test tables +CREATE TABLE postgres_local_table (a int); +CREATE TABLE partitioned_postgres_local_table(a int) PARTITION BY RANGE(a); +CREATE TABLE partitioned_postgres_local_table_1 PARTITION OF partitioned_postgres_local_table FOR VALUES FROM (0) TO (3); +CREATE TABLE partitioned_postgres_local_table_2 PARTITION OF partitioned_postgres_local_table FOR VALUES FROM (3) TO (1000); +CREATE TABLE reference_table(a int); +SELECT create_reference_table('reference_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW view_on_ref AS SELECT * FROM reference_table; +CREATE TABLE citus_local_table(a int); +SELECT create_citus_local_table('citus_local_table'); + create_citus_local_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW view_on_citus_local AS SELECT * FROM citus_local_table; +CREATE UNLOGGED TABLE unlogged_distributed_table(a int, b int); +SELECT create_distributed_table('unlogged_distributed_table', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE TABLE distributed_table(a int); +SELECT create_distributed_table('distributed_table', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW view_on_dist AS SELECT * FROM distributed_table; +CREATE MATERIALIZED VIEW mat_view_on_dist AS SELECT * FROM distributed_table; +CREATE TABLE partitioned_distributed_table(a int, b int) PARTITION BY RANGE(a); +CREATE TABLE partitioned_distributed_table_1 PARTITION OF partitioned_distributed_table FOR VALUES FROM (0) TO (3); +CREATE TABLE partitioned_distributed_table_2 PARTITION OF partitioned_distributed_table FOR VALUES FROM (3) TO (1000); +SELECT create_distributed_table('partitioned_distributed_table', 'a'); + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +CREATE VIEW view_on_part_dist AS SELECT * FROM partitioned_distributed_table; +CREATE MATERIALIZED VIEW mat_view_on_part_dist AS SELECT * FROM partitioned_distributed_table; +CREATE FOREIGN TABLE foreign_distributed_table (a int, b int) SERVER fake_fdw_server; +SELECT create_distributed_table('foreign_distributed_table', 'a'); +NOTICE: foreign-data wrapper "fake_fdw" does not have an extension defined + create_distributed_table +--------------------------------------------------------------------- + +(1 row) + +-- and insert some data +INSERT INTO postgres_local_table SELECT * FROM generate_series(0, 5); +INSERT INTO partitioned_postgres_local_table SELECT * FROM generate_series(0, 5); +INSERT INTO reference_table SELECT * FROM generate_series(0, 5); +INSERT INTO citus_local_table SELECT * FROM generate_series(0, 5); +INSERT INTO unlogged_distributed_table SELECT a,a+1 FROM generate_series(0, 5) AS a; +INSERT INTO distributed_table SELECT * FROM generate_series(0, 5); +INSERT INTO partitioned_distributed_table SELECT a,a+1 FROM generate_series(0, 5) AS a; +-- should work +SELECT * FROM partitioned_distributed_table UNION SELECT 1,1 ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 0 | 1 + 1 | 1 + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(7 rows) + +SELECT * FROM partitioned_distributed_table UNION SELECT 1, * FROM postgres_local_table ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 0 | 1 + 1 | 0 + 1 | 1 + 1 | 2 + 1 | 3 + 1 | 4 + 1 | 5 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(11 rows) + +SELECT * FROM partitioned_distributed_table UNION SELECT * FROM unlogged_distributed_table ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 0 | 1 + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(6 rows) + +SELECT *, 1 FROM postgres_local_table UNION SELECT * FROM unlogged_distributed_table ORDER BY 1,2; + a | ?column? +--------------------------------------------------------------------- + 0 | 1 + 1 | 1 + 1 | 2 + 2 | 1 + 2 | 3 + 3 | 1 + 3 | 4 + 4 | 1 + 4 | 5 + 5 | 1 + 5 | 6 +(11 rows) + +SELECT * FROM unlogged_distributed_table UNION SELECT 1,1 ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 0 | 1 + 1 | 1 + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(7 rows) + +SELECT * from foreign_distributed_table UNION SELECT 1,1 ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT 1 UNION SELECT * FROM citus_local_table ORDER BY 1; + ?column? +--------------------------------------------------------------------- + 0 + 1 + 2 + 3 + 4 + 5 +(6 rows) + +SELECT * FROM view_on_part_dist UNION SELECT 1,1 ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 0 | 1 + 1 | 1 + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(7 rows) + +SELECT * FROM mat_view_on_part_dist UNION SELECT 1,1 ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT * FROM view_on_citus_local UNION SELECT 1 ORDER BY 1; + a +--------------------------------------------------------------------- + 0 + 1 + 2 + 3 + 4 + 5 +(6 rows) + +SELECT * FROM view_on_dist UNION SELECT 1 ORDER BY 1; + a +--------------------------------------------------------------------- + 0 + 1 + 2 + 3 + 4 + 5 +(6 rows) + +SELECT * FROM mat_view_on_dist UNION SELECT 1 ORDER BY 1; + a +--------------------------------------------------------------------- + 1 +(1 row) + +SET client_min_messages TO DEBUG1; +-- can push down the union in subquery +SELECT * FROM (SELECT * FROM partitioned_distributed_table UNION SELECT * FROM partitioned_distributed_table) AS foo; + a | b +--------------------------------------------------------------------- + 1 | 2 + 5 | 6 + 4 | 5 + 3 | 4 + 0 | 1 + 2 | 3 +(6 rows) + +-- cannot push down the subquery, should evaluate subquery by creating a subplan +SELECT COUNT(*) FROM (SELECT b, random() FROM partitioned_distributed_table GROUP BY b) AS foo; +DEBUG: generating subplan XXX_1 for subquery SELECT b, random() AS random FROM mixed_relkind_tests.partitioned_distributed_table GROUP BY b +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.b, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(b integer, random double precision)) foo + count +--------------------------------------------------------------------- + 6 +(1 row) + +SELECT * FROM partitioned_distributed_table WHERE b IN (SELECT a FROM postgres_local_table) ORDER BY 1,2; +DEBUG: generating subplan XXX_1 for subquery SELECT a FROM mixed_relkind_tests.postgres_local_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT a, b FROM mixed_relkind_tests.partitioned_distributed_table WHERE (b OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.a FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer))) ORDER BY a, b + a | b +--------------------------------------------------------------------- + 0 | 1 + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 +(5 rows) + +-- can push down the subquery +SELECT * FROM partitioned_distributed_table WHERE a IN (SELECT a FROM distributed_table) ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 0 | 1 + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(6 rows) + +SELECT * FROM partitioned_distributed_table WHERE a IN (SELECT a FROM view_on_part_dist) ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 0 | 1 + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(6 rows) + +SELECT * FROM distributed_table WHERE a IN (SELECT a FROM view_on_part_dist) ORDER BY 1; + a +--------------------------------------------------------------------- + 0 + 1 + 2 + 3 + 4 + 5 +(6 rows) + +SELECT * FROM view_on_dist WHERE a IN (SELECT a FROM view_on_part_dist) ORDER BY 1; + a +--------------------------------------------------------------------- + 0 + 1 + 2 + 3 + 4 + 5 +(6 rows) + +SELECT * FROM view_on_citus_local WHERE a IN (SELECT a FROM reference_table) ORDER BY 1; + a +--------------------------------------------------------------------- + 0 + 1 + 2 + 3 + 4 + 5 +(6 rows) + +SELECT COUNT(*) FROM (SELECT a, random() FROM partitioned_distributed_table GROUP BY a) AS foo; + count +--------------------------------------------------------------------- + 6 +(1 row) + +-- should add (a IS NOT NULL) filters similar to regular distributed tables +RESET client_min_messages; +SELECT public.explain_has_is_not_null( +$$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT * FROM partitioned_distributed_table; +$$); + explain_has_is_not_null +--------------------------------------------------------------------- + t +(1 row) + +SET client_min_messages TO DEBUG1; +-- should fail +SELECT * FROM partitioned_postgres_local_table JOIN distributed_table ON (true); +ERROR: direct joins between distributed and local tables are not supported +HINT: Use CTE's or subqueries to select from local tables and use them in joins +SELECT * FROM partitioned_postgres_local_table JOIN partitioned_distributed_table ON (true); +ERROR: direct joins between distributed and local tables are not supported +HINT: Use CTE's or subqueries to select from local tables and use them in joins +SELECT * FROM distributed_table JOIN partitioned_postgres_local_table ON (true); +ERROR: direct joins between distributed and local tables are not supported +HINT: Use CTE's or subqueries to select from local tables and use them in joins +SELECT * FROM reference_table LEFT JOIN partitioned_distributed_table ON true; +ERROR: cannot pushdown the subquery +DETAIL: There exist a reference table in the outer part of the outer join +INSERT INTO partitioned_distributed_table SELECT foo.* FROM partitioned_distributed_table AS foo JOIN citus_local_table ON (true); +DEBUG: distributed INSERT ... SELECT cannot select from distributed tables and local tables at the same time +ERROR: direct joins between distributed and local tables are not supported +INSERT INTO partitioned_distributed_table SELECT foo.* FROM distributed_table AS foo JOIN citus_local_table ON (true); +DEBUG: distributed INSERT ... SELECT cannot select from distributed tables and local tables at the same time +ERROR: direct joins between distributed and local tables are not supported +INSERT INTO distributed_table SELECT foo.a FROM partitioned_distributed_table AS foo JOIN citus_local_table ON (true); +DEBUG: distributed INSERT ... SELECT cannot select from distributed tables and local tables at the same time +ERROR: direct joins between distributed and local tables are not supported +-- non-colocated subquery should work +SELECT COUNT(*) FROM + (SELECT *, random() FROM partitioned_distributed_table) AS foo, + (SELECT *, random() FROM partitioned_distributed_table) AS bar +WHERE foo.a = bar.b; +DEBUG: generating subplan XXX_1 for subquery SELECT a, b, random() AS random FROM mixed_relkind_tests.partitioned_distributed_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT partitioned_distributed_table.a, partitioned_distributed_table.b, random() AS random FROM mixed_relkind_tests.partitioned_distributed_table) foo, (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, random double precision)) bar WHERE (foo.a OPERATOR(pg_catalog.=) bar.b) + count +--------------------------------------------------------------------- + 5 +(1 row) + +SELECT COUNT(*) FROM + (SELECT *, random() FROM unlogged_distributed_table) AS foo, + (SELECT *, random() FROM foreign_distributed_table) AS bar +WHERE foo.a = bar.b; +DEBUG: generating subplan XXX_1 for subquery SELECT a, b, random() AS random FROM mixed_relkind_tests.foreign_distributed_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT unlogged_distributed_table.a, unlogged_distributed_table.b, random() AS random FROM mixed_relkind_tests.unlogged_distributed_table) foo, (SELECT intermediate_result.a, intermediate_result.b, intermediate_result.random FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer, random double precision)) bar WHERE (foo.a OPERATOR(pg_catalog.=) bar.b) + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- should fail +UPDATE partitioned_distributed_table SET b = foo.a FROM citus_local_table AS foo; +ERROR: cannot plan modifications with citus local tables and distributed tables +HINT: Use CTE's or subqueries to select from local tables and use them in joins +UPDATE partitioned_distributed_table SET b = foo.a FROM postgres_local_table AS foo; +ERROR: cannot plan modifications with local tables involving citus tables +HINT: Use CTE's or subqueries to select from local tables and use them in joins +UPDATE partitioned_distributed_table SET a = foo.a FROM postgres_local_table AS foo WHERE foo.a = partitioned_distributed_table.a; +ERROR: cannot plan modifications with local tables involving citus tables +HINT: Use CTE's or subqueries to select from local tables and use them in joins +UPDATE partitioned_distributed_table SET a = foo.a FROM citus_local_table AS foo WHERE foo.a = partitioned_distributed_table.a; +ERROR: cannot plan modifications with citus local tables and distributed tables +HINT: Use CTE's or subqueries to select from local tables and use them in joins +UPDATE partitioned_distributed_table SET a = foo.a FROM mat_view_on_part_dist AS foo WHERE foo.a = partitioned_distributed_table.a; +ERROR: materialized views in modify queries are not supported +UPDATE partitioned_distributed_table SET a = foo.a FROM partitioned_distributed_table AS foo WHERE foo.a < partitioned_distributed_table.a; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +UPDATE partitioned_distributed_table SET a = foo.a FROM distributed_table AS foo WHERE foo.a < partitioned_distributed_table.a; +ERROR: complex joins are only supported when all distributed tables are co-located and joined on their distribution columns +-- should work +UPDATE partitioned_distributed_table SET a = foo.a FROM partitioned_distributed_table AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM view_on_part_dist AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM view_on_dist AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM view_on_ref AS foo WHERE foo.a = partitioned_distributed_table.a; +-- JOINs on the distribution key +SELECT COUNT(*) FROM partitioned_distributed_table p1 JOIN partitioned_distributed_table p2 USING (a); + count +--------------------------------------------------------------------- + 6 +(1 row) + +SELECT COUNT(*) FROM unlogged_distributed_table u1 JOIN partitioned_distributed_table p2 USING (a); + count +--------------------------------------------------------------------- + 6 +(1 row) + +SELECT COUNT(*) FROM partitioned_distributed_table p1 LEFT JOIN partitioned_distributed_table p2 USING (a); + count +--------------------------------------------------------------------- + 6 +(1 row) + +-- lateral JOIN +SELECT COUNT(*) FROM partitioned_distributed_table p1 JOIN LATERAL (SELECT * FROM partitioned_distributed_table p2 WHERE p1.a = p2.a) AS foo ON (true); + count +--------------------------------------------------------------------- + 6 +(1 row) + +-- router query +SELECT COUNT(*) FROM partitioned_distributed_table p1 JOIN partitioned_distributed_table p2 USING (a) WHERE a = 1; + count +--------------------------------------------------------------------- + 1 +(1 row) + +-- repartition query +SET citus.enable_repartition_joins TO ON; +SELECT COUNT(*) FROM partitioned_distributed_table p1 JOIN partitioned_distributed_table p2 USING (b) WHERE b = 1; + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT COUNT(*) FROM unlogged_distributed_table u1 JOIN partitioned_distributed_table p2 USING (b) WHERE b = 1; + count +--------------------------------------------------------------------- + 1 +(1 row) + +RESET citus.enable_repartition_joins; +-- joins with cte's +WITH cte_1 AS (SELECT * FROM partitioned_distributed_table) + SELECT COUNT(*) FROM cte_1; +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT a, b FROM mixed_relkind_tests.partitioned_distributed_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 + count +--------------------------------------------------------------------- + 6 +(1 row) + +WITH cte_1 AS (SELECT * FROM partitioned_distributed_table) + SELECT COUNT(*) FROM cte_1 JOIN partitioned_distributed_table USING (a); +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT a, b FROM mixed_relkind_tests.partitioned_distributed_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 JOIN mixed_relkind_tests.partitioned_distributed_table USING (a)) + count +--------------------------------------------------------------------- + 6 +(1 row) + +WITH cte_1 AS (SELECT * FROM foreign_distributed_table) + SELECT COUNT(*) FROM cte_1 JOIN foreign_distributed_table USING (a); +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT a, b FROM mixed_relkind_tests.foreign_distributed_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 JOIN mixed_relkind_tests.foreign_distributed_table USING (a)) + count +--------------------------------------------------------------------- + 0 +(1 row) + +WITH cte_1 AS (SELECT * FROM partitioned_distributed_table) + SELECT COUNT(*) FROM cte_1 JOIN partitioned_distributed_table USING (b); +DEBUG: generating subplan XXX_1 for CTE cte_1: SELECT a, b FROM mixed_relkind_tests.partitioned_distributed_table +DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM ((SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte_1 JOIN mixed_relkind_tests.partitioned_distributed_table USING (b)) + count +--------------------------------------------------------------------- + 6 +(1 row) + +-- multi shard colocated update +UPDATE partitioned_distributed_table dt +SET b = sub1.a + sub2.a +FROM (SELECT * FROM partitioned_distributed_table WHERE b = 1) AS sub1, + (SELECT * FROM partitioned_distributed_table WHERE b = 2) AS sub2 +WHERE sub1.a = sub2.a AND sub1.a = dt.a AND dt.a > 1; +UPDATE unlogged_distributed_table dt +SET b = sub1.a + sub2.a +FROM (SELECT * FROM unlogged_distributed_table WHERE b = 1) AS sub1, + (SELECT * FROM unlogged_distributed_table WHERE b = 2) AS sub2 +WHERE sub1.a = sub2.a AND sub1.a = dt.a AND dt.a > 1; +-- multi shard non-colocated update +WITH cte1 AS (SELECT * FROM partitioned_distributed_table WHERE b = 1), + cte2 AS (SELECT * FROM partitioned_distributed_table WHERE b = 2) +UPDATE partitioned_distributed_table dt SET b = cte1.a + cte2.a +FROM cte1, cte2 WHERE cte1.a != cte2.a AND cte1.a = dt.a AND dt.a > 1; +DEBUG: generating subplan XXX_1 for CTE cte1: SELECT a, b FROM mixed_relkind_tests.partitioned_distributed_table WHERE (b OPERATOR(pg_catalog.=) 1) +DEBUG: generating subplan XXX_2 for CTE cte2: SELECT a, b FROM mixed_relkind_tests.partitioned_distributed_table WHERE (b OPERATOR(pg_catalog.=) 2) +DEBUG: Plan XXX query after replacing subqueries and CTEs: UPDATE mixed_relkind_tests.partitioned_distributed_table dt SET b = (cte1.a OPERATOR(pg_catalog.+) cte2.a) FROM (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte1, (SELECT intermediate_result.a, intermediate_result.b FROM read_intermediate_result('XXX_2'::text, 'binary'::citus_copy_format) intermediate_result(a integer, b integer)) cte2 WHERE ((cte1.a OPERATOR(pg_catalog.<>) cte2.a) AND (cte1.a OPERATOR(pg_catalog.=) dt.a) AND (dt.a OPERATOR(pg_catalog.>) 1)) +-- router update with CTE +UPDATE partitioned_distributed_table dt +SET b = sub1.a + sub2.a +FROM (SELECT * FROM partitioned_distributed_table WHERE b = 1) AS sub1, + (SELECT * FROM partitioned_distributed_table WHERE b = 2) AS sub2 +WHERE sub1.a = sub2.a AND sub1.a = dt.a AND dt.a = 1; +-- INSERT .. SELECT via coordinator +RESET client_min_messages; +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT * FROM partitioned_distributed_table ORDER BY 1,2 LIMIT 5; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Limit + -> Sort + Sort Key: remote_scan.a, remote_scan.b + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO unlogged_distributed_table SELECT * FROM partitioned_distributed_table ORDER BY 1,2 LIMIT 5; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Limit + -> Sort + Sort Key: remote_scan.a, remote_scan.b + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT * FROM distributed_table ORDER BY 1 LIMIT 5; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: pull to coordinator + -> Limit + -> Sort + Sort Key: remote_scan.a + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +-- INSERT .. SELECT via repartition +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT a + 1 FROM partitioned_distributed_table; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO unlogged_distributed_table SELECT a + 1 FROM partitioned_distributed_table; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT a + 1 FROM distributed_table; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT a + 1 FROM unlogged_distributed_table; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Custom Scan (Citus INSERT ... SELECT) + INSERT/SELECT method: repartition + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +SET client_min_messages TO DEBUG1; +-- some aggregate queries +SELECT sum(a) FROM partitioned_distributed_table; + sum +--------------------------------------------------------------------- + 15 +(1 row) + +SELECT ceil(regr_syy(a, b)) FROM partitioned_distributed_table; + ceil +--------------------------------------------------------------------- + 18 +(1 row) + +SELECT ceil(regr_syy(a, b)) FROM unlogged_distributed_table; + ceil +--------------------------------------------------------------------- + 18 +(1 row) + +-- pushdown WINDOW +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +SELECT a, COUNT(*) OVER (PARTITION BY a) FROM partitioned_distributed_table ORDER BY 1,2; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Sort + Sort Key: remote_scan.a, remote_scan.count + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +SELECT a, COUNT(*) OVER (PARTITION BY a) FROM foreign_distributed_table ORDER BY 1,2; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Sort + Sort Key: remote_scan.a, remote_scan.count + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(4 rows) + +-- pull to coordinator WINDOW +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +SELECT a, COUNT(*) OVER (PARTITION BY a+1) FROM partitioned_distributed_table ORDER BY 1,2; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Sort + Sort Key: remote_scan.a, (count(*) OVER (?)) + -> WindowAgg + -> Sort + Sort Key: remote_scan.worker_column_2 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +SELECT a, COUNT(*) OVER (PARTITION BY a+1) FROM foreign_distributed_table ORDER BY 1,2; +$Q$); + coordinator_plan +--------------------------------------------------------------------- + Sort + Sort Key: remote_scan.a, (count(*) OVER (?)) + -> WindowAgg + -> Sort + Sort Key: remote_scan.worker_column_2 + -> Custom Scan (Citus Adaptive) + Task Count: 4 +(7 rows) + +-- FOR UPDATE +SELECT * FROM partitioned_distributed_table WHERE a = 1 ORDER BY 1,2 FOR UPDATE; + a | b +--------------------------------------------------------------------- + 1 | 2 +(1 row) + +SELECT * FROM unlogged_distributed_table WHERE a = 1 ORDER BY 1,2 FOR UPDATE; + a | b +--------------------------------------------------------------------- + 1 | 2 +(1 row) + +VACUUM partitioned_distributed_table; +TRUNCATE partitioned_distributed_table; +SET client_min_messages TO ERROR; +-- drop column followed by SELECT in transaction block +BEGIN; + ALTER TABLE partitioned_distributed_table DROP COLUMN b CASCADE; + SELECT * FROM partitioned_distributed_table; + a +--------------------------------------------------------------------- +(0 rows) + +COMMIT; +BEGIN; + ALTER TABLE foreign_distributed_table DROP COLUMN b CASCADE; + SELECT * FROM foreign_distributed_table; + a +--------------------------------------------------------------------- +(0 rows) + +COMMIT; +-- cleanup at exit +DROP SCHEMA mixed_relkind_tests CASCADE; diff --git a/src/test/regress/expected/multi_test_helpers.out b/src/test/regress/expected/multi_test_helpers.out index 59b1ee764..fef5c7f40 100644 --- a/src/test/regress/expected/multi_test_helpers.out +++ b/src/test/regress/expected/multi_test_helpers.out @@ -29,6 +29,20 @@ BEGIN END LOOP; RETURN; END; $$ language plpgsql; +-- helper function that returns true if output of given explain has "is not null" (case in-sensitive) +CREATE OR REPLACE FUNCTION explain_has_is_not_null(explain_commmand text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan text; +BEGIN + FOR query_plan IN EXECUTE explain_commmand LOOP + IF query_plan ILIKE '%is not null%' + THEN + RETURN true; + END IF; + END LOOP; + RETURN false; +END; $$ language plpgsql; -- helper function to quickly run SQL on the whole cluster CREATE OR REPLACE FUNCTION run_command_on_coordinator_and_workers(p_sql text) RETURNS void LANGUAGE plpgsql AS $$ diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index 43fc468d5..c9e60c809 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -310,7 +310,7 @@ test: replicate_reference_tables_to_coordinator test: coordinator_shouldhaveshards test: local_shard_utility_command_execution test: citus_local_tables -test: multi_row_router_insert +test: multi_row_router_insert mixed_relkind_tests test: remove_coordinator diff --git a/src/test/regress/sql/mixed_relkind_tests.sql b/src/test/regress/sql/mixed_relkind_tests.sql new file mode 100644 index 000000000..62ba66508 --- /dev/null +++ b/src/test/regress/sql/mixed_relkind_tests.sql @@ -0,0 +1,284 @@ +\SET VERBOSITY terse + +SET citus.next_shard_id TO 1513000; +SET citus.shard_replication_factor TO 1; + +CREATE SCHEMA mixed_relkind_tests; +SET search_path TO mixed_relkind_tests; + +-- ensure that coordinator is added to pg_dist_node +SET client_min_messages TO ERROR; +SELECT 1 FROM master_add_node('localhost', :master_port, groupId => 0); +RESET client_min_messages; + +-- make results consistent +SET citus.enable_cte_inlining TO OFF; + +-- create test tables +CREATE TABLE postgres_local_table (a int); + +CREATE TABLE partitioned_postgres_local_table(a int) PARTITION BY RANGE(a); +CREATE TABLE partitioned_postgres_local_table_1 PARTITION OF partitioned_postgres_local_table FOR VALUES FROM (0) TO (3); +CREATE TABLE partitioned_postgres_local_table_2 PARTITION OF partitioned_postgres_local_table FOR VALUES FROM (3) TO (1000); + +CREATE TABLE reference_table(a int); +SELECT create_reference_table('reference_table'); + +CREATE VIEW view_on_ref AS SELECT * FROM reference_table; + +CREATE TABLE citus_local_table(a int); +SELECT create_citus_local_table('citus_local_table'); + +CREATE VIEW view_on_citus_local AS SELECT * FROM citus_local_table; + +CREATE UNLOGGED TABLE unlogged_distributed_table(a int, b int); +SELECT create_distributed_table('unlogged_distributed_table', 'a'); + +CREATE TABLE distributed_table(a int); +SELECT create_distributed_table('distributed_table', 'a'); + +CREATE VIEW view_on_dist AS SELECT * FROM distributed_table; +CREATE MATERIALIZED VIEW mat_view_on_dist AS SELECT * FROM distributed_table; + +CREATE TABLE partitioned_distributed_table(a int, b int) PARTITION BY RANGE(a); +CREATE TABLE partitioned_distributed_table_1 PARTITION OF partitioned_distributed_table FOR VALUES FROM (0) TO (3); +CREATE TABLE partitioned_distributed_table_2 PARTITION OF partitioned_distributed_table FOR VALUES FROM (3) TO (1000); +SELECT create_distributed_table('partitioned_distributed_table', 'a'); + +CREATE VIEW view_on_part_dist AS SELECT * FROM partitioned_distributed_table; +CREATE MATERIALIZED VIEW mat_view_on_part_dist AS SELECT * FROM partitioned_distributed_table; + +CREATE FOREIGN TABLE foreign_distributed_table (a int, b int) SERVER fake_fdw_server; +SELECT create_distributed_table('foreign_distributed_table', 'a'); + +-- and insert some data +INSERT INTO postgres_local_table SELECT * FROM generate_series(0, 5); +INSERT INTO partitioned_postgres_local_table SELECT * FROM generate_series(0, 5); +INSERT INTO reference_table SELECT * FROM generate_series(0, 5); +INSERT INTO citus_local_table SELECT * FROM generate_series(0, 5); +INSERT INTO unlogged_distributed_table SELECT a,a+1 FROM generate_series(0, 5) AS a; +INSERT INTO distributed_table SELECT * FROM generate_series(0, 5); +INSERT INTO partitioned_distributed_table SELECT a,a+1 FROM generate_series(0, 5) AS a; + +-- should work +SELECT * FROM partitioned_distributed_table UNION SELECT 1,1 ORDER BY 1,2; +SELECT * FROM partitioned_distributed_table UNION SELECT 1, * FROM postgres_local_table ORDER BY 1,2; +SELECT * FROM partitioned_distributed_table UNION SELECT * FROM unlogged_distributed_table ORDER BY 1,2; +SELECT *, 1 FROM postgres_local_table UNION SELECT * FROM unlogged_distributed_table ORDER BY 1,2; +SELECT * FROM unlogged_distributed_table UNION SELECT 1,1 ORDER BY 1,2; +SELECT * from foreign_distributed_table UNION SELECT 1,1 ORDER BY 1,2; +SELECT 1 UNION SELECT * FROM citus_local_table ORDER BY 1; + +SELECT * FROM view_on_part_dist UNION SELECT 1,1 ORDER BY 1,2; +SELECT * FROM mat_view_on_part_dist UNION SELECT 1,1 ORDER BY 1,2; +SELECT * FROM view_on_citus_local UNION SELECT 1 ORDER BY 1; +SELECT * FROM view_on_dist UNION SELECT 1 ORDER BY 1; +SELECT * FROM mat_view_on_dist UNION SELECT 1 ORDER BY 1; + +SET client_min_messages TO DEBUG1; + +-- can push down the union in subquery +SELECT * FROM (SELECT * FROM partitioned_distributed_table UNION SELECT * FROM partitioned_distributed_table) AS foo; + +-- cannot push down the subquery, should evaluate subquery by creating a subplan +SELECT COUNT(*) FROM (SELECT b, random() FROM partitioned_distributed_table GROUP BY b) AS foo; +SELECT * FROM partitioned_distributed_table WHERE b IN (SELECT a FROM postgres_local_table) ORDER BY 1,2; + +-- can push down the subquery +SELECT * FROM partitioned_distributed_table WHERE a IN (SELECT a FROM distributed_table) ORDER BY 1,2; +SELECT * FROM partitioned_distributed_table WHERE a IN (SELECT a FROM view_on_part_dist) ORDER BY 1,2; +SELECT * FROM distributed_table WHERE a IN (SELECT a FROM view_on_part_dist) ORDER BY 1; +SELECT * FROM view_on_dist WHERE a IN (SELECT a FROM view_on_part_dist) ORDER BY 1; +SELECT * FROM view_on_citus_local WHERE a IN (SELECT a FROM reference_table) ORDER BY 1; +SELECT COUNT(*) FROM (SELECT a, random() FROM partitioned_distributed_table GROUP BY a) AS foo; + +-- should add (a IS NOT NULL) filters similar to regular distributed tables +RESET client_min_messages; +SELECT public.explain_has_is_not_null( +$$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT * FROM partitioned_distributed_table; +$$); +SET client_min_messages TO DEBUG1; + +-- should fail +SELECT * FROM partitioned_postgres_local_table JOIN distributed_table ON (true); +SELECT * FROM partitioned_postgres_local_table JOIN partitioned_distributed_table ON (true); +SELECT * FROM distributed_table JOIN partitioned_postgres_local_table ON (true); +SELECT * FROM reference_table LEFT JOIN partitioned_distributed_table ON true; +INSERT INTO partitioned_distributed_table SELECT foo.* FROM partitioned_distributed_table AS foo JOIN citus_local_table ON (true); +INSERT INTO partitioned_distributed_table SELECT foo.* FROM distributed_table AS foo JOIN citus_local_table ON (true); +INSERT INTO distributed_table SELECT foo.a FROM partitioned_distributed_table AS foo JOIN citus_local_table ON (true); + +-- non-colocated subquery should work +SELECT COUNT(*) FROM + (SELECT *, random() FROM partitioned_distributed_table) AS foo, + (SELECT *, random() FROM partitioned_distributed_table) AS bar +WHERE foo.a = bar.b; + +SELECT COUNT(*) FROM + (SELECT *, random() FROM unlogged_distributed_table) AS foo, + (SELECT *, random() FROM foreign_distributed_table) AS bar +WHERE foo.a = bar.b; + +-- should fail +UPDATE partitioned_distributed_table SET b = foo.a FROM citus_local_table AS foo; +UPDATE partitioned_distributed_table SET b = foo.a FROM postgres_local_table AS foo; +UPDATE partitioned_distributed_table SET a = foo.a FROM postgres_local_table AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM citus_local_table AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM mat_view_on_part_dist AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM partitioned_distributed_table AS foo WHERE foo.a < partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM distributed_table AS foo WHERE foo.a < partitioned_distributed_table.a; + +-- should work +UPDATE partitioned_distributed_table SET a = foo.a FROM partitioned_distributed_table AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM view_on_part_dist AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM view_on_dist AS foo WHERE foo.a = partitioned_distributed_table.a; +UPDATE partitioned_distributed_table SET a = foo.a FROM view_on_ref AS foo WHERE foo.a = partitioned_distributed_table.a; + +-- JOINs on the distribution key +SELECT COUNT(*) FROM partitioned_distributed_table p1 JOIN partitioned_distributed_table p2 USING (a); +SELECT COUNT(*) FROM unlogged_distributed_table u1 JOIN partitioned_distributed_table p2 USING (a); +SELECT COUNT(*) FROM partitioned_distributed_table p1 LEFT JOIN partitioned_distributed_table p2 USING (a); + +-- lateral JOIN +SELECT COUNT(*) FROM partitioned_distributed_table p1 JOIN LATERAL (SELECT * FROM partitioned_distributed_table p2 WHERE p1.a = p2.a) AS foo ON (true); + +-- router query +SELECT COUNT(*) FROM partitioned_distributed_table p1 JOIN partitioned_distributed_table p2 USING (a) WHERE a = 1; + +-- repartition query +SET citus.enable_repartition_joins TO ON; +SELECT COUNT(*) FROM partitioned_distributed_table p1 JOIN partitioned_distributed_table p2 USING (b) WHERE b = 1; +SELECT COUNT(*) FROM unlogged_distributed_table u1 JOIN partitioned_distributed_table p2 USING (b) WHERE b = 1; +RESET citus.enable_repartition_joins; + +-- joins with cte's +WITH cte_1 AS (SELECT * FROM partitioned_distributed_table) + SELECT COUNT(*) FROM cte_1; + +WITH cte_1 AS (SELECT * FROM partitioned_distributed_table) + SELECT COUNT(*) FROM cte_1 JOIN partitioned_distributed_table USING (a); + +WITH cte_1 AS (SELECT * FROM foreign_distributed_table) + SELECT COUNT(*) FROM cte_1 JOIN foreign_distributed_table USING (a); + +WITH cte_1 AS (SELECT * FROM partitioned_distributed_table) + SELECT COUNT(*) FROM cte_1 JOIN partitioned_distributed_table USING (b); + +-- multi shard colocated update +UPDATE partitioned_distributed_table dt +SET b = sub1.a + sub2.a +FROM (SELECT * FROM partitioned_distributed_table WHERE b = 1) AS sub1, + (SELECT * FROM partitioned_distributed_table WHERE b = 2) AS sub2 +WHERE sub1.a = sub2.a AND sub1.a = dt.a AND dt.a > 1; + +UPDATE unlogged_distributed_table dt +SET b = sub1.a + sub2.a +FROM (SELECT * FROM unlogged_distributed_table WHERE b = 1) AS sub1, + (SELECT * FROM unlogged_distributed_table WHERE b = 2) AS sub2 +WHERE sub1.a = sub2.a AND sub1.a = dt.a AND dt.a > 1; + +-- multi shard non-colocated update +WITH cte1 AS (SELECT * FROM partitioned_distributed_table WHERE b = 1), + cte2 AS (SELECT * FROM partitioned_distributed_table WHERE b = 2) +UPDATE partitioned_distributed_table dt SET b = cte1.a + cte2.a +FROM cte1, cte2 WHERE cte1.a != cte2.a AND cte1.a = dt.a AND dt.a > 1; + +-- router update with CTE +UPDATE partitioned_distributed_table dt +SET b = sub1.a + sub2.a +FROM (SELECT * FROM partitioned_distributed_table WHERE b = 1) AS sub1, + (SELECT * FROM partitioned_distributed_table WHERE b = 2) AS sub2 +WHERE sub1.a = sub2.a AND sub1.a = dt.a AND dt.a = 1; + +-- INSERT .. SELECT via coordinator +RESET client_min_messages; + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT * FROM partitioned_distributed_table ORDER BY 1,2 LIMIT 5; +$Q$); + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO unlogged_distributed_table SELECT * FROM partitioned_distributed_table ORDER BY 1,2 LIMIT 5; +$Q$); + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT * FROM distributed_table ORDER BY 1 LIMIT 5; +$Q$); + +-- INSERT .. SELECT via repartition +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT a + 1 FROM partitioned_distributed_table; +$Q$); + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO unlogged_distributed_table SELECT a + 1 FROM partitioned_distributed_table; +$Q$); + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT a + 1 FROM distributed_table; +$Q$); + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +INSERT INTO partitioned_distributed_table SELECT a + 1 FROM unlogged_distributed_table; +$Q$); + +SET client_min_messages TO DEBUG1; + +-- some aggregate queries +SELECT sum(a) FROM partitioned_distributed_table; +SELECT ceil(regr_syy(a, b)) FROM partitioned_distributed_table; +SELECT ceil(regr_syy(a, b)) FROM unlogged_distributed_table; + +-- pushdown WINDOW +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +SELECT a, COUNT(*) OVER (PARTITION BY a) FROM partitioned_distributed_table ORDER BY 1,2; +$Q$); + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +SELECT a, COUNT(*) OVER (PARTITION BY a) FROM foreign_distributed_table ORDER BY 1,2; +$Q$); + +-- pull to coordinator WINDOW +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +SELECT a, COUNT(*) OVER (PARTITION BY a+1) FROM partitioned_distributed_table ORDER BY 1,2; +$Q$); + +SELECT public.coordinator_plan($Q$ +EXPLAIN (COSTS OFF) +SELECT a, COUNT(*) OVER (PARTITION BY a+1) FROM foreign_distributed_table ORDER BY 1,2; +$Q$); + +-- FOR UPDATE +SELECT * FROM partitioned_distributed_table WHERE a = 1 ORDER BY 1,2 FOR UPDATE; +SELECT * FROM unlogged_distributed_table WHERE a = 1 ORDER BY 1,2 FOR UPDATE; + +VACUUM partitioned_distributed_table; +TRUNCATE partitioned_distributed_table; + +SET client_min_messages TO ERROR; + +-- drop column followed by SELECT in transaction block +BEGIN; + ALTER TABLE partitioned_distributed_table DROP COLUMN b CASCADE; + SELECT * FROM partitioned_distributed_table; +COMMIT; + +BEGIN; + ALTER TABLE foreign_distributed_table DROP COLUMN b CASCADE; + SELECT * FROM foreign_distributed_table; +COMMIT; + +-- cleanup at exit +DROP SCHEMA mixed_relkind_tests CASCADE; diff --git a/src/test/regress/sql/multi_test_helpers.sql b/src/test/regress/sql/multi_test_helpers.sql index d504c0007..7c37a940e 100644 --- a/src/test/regress/sql/multi_test_helpers.sql +++ b/src/test/regress/sql/multi_test_helpers.sql @@ -33,6 +33,21 @@ BEGIN RETURN; END; $$ language plpgsql; +-- helper function that returns true if output of given explain has "is not null" (case in-sensitive) +CREATE OR REPLACE FUNCTION explain_has_is_not_null(explain_commmand text) +RETURNS BOOLEAN AS $$ +DECLARE + query_plan text; +BEGIN + FOR query_plan IN EXECUTE explain_commmand LOOP + IF query_plan ILIKE '%is not null%' + THEN + RETURN true; + END IF; + END LOOP; + RETURN false; +END; $$ language plpgsql; + -- helper function to quickly run SQL on the whole cluster CREATE OR REPLACE FUNCTION run_command_on_coordinator_and_workers(p_sql text) RETURNS void LANGUAGE plpgsql AS $$ From d912d4bc38b9486e96e8a7dffb0068fd90a40f84 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 6 Nov 2020 10:26:53 +0300 Subject: [PATCH 097/124] Print full file path in valgrind testing (#4299) --- src/test/regress/pg_regress_multi.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/regress/pg_regress_multi.pl b/src/test/regress/pg_regress_multi.pl index 1019df50f..e7065bd29 100755 --- a/src/test/regress/pg_regress_multi.pl +++ b/src/test/regress/pg_regress_multi.pl @@ -245,6 +245,7 @@ exec $valgrindPath \\ --error-markers=VALGRINDERROR-BEGIN,VALGRINDERROR-END \\ --max-stackframe=16000000 \\ --log-file=$valgrindLogFile \\ + --fullpath-after=/ \\ $bindir/postgres.orig \\ "\$@" END From 05569526077a45d170a96500446c743c87d4cea2 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 6 Nov 2020 10:44:01 +0300 Subject: [PATCH 098/124] Normalize partitioned table aliases in explain output (#4295) Aliases that postgres choose for partitioned tables in explain output might change in different pg versions, so normalize them and remove the alternative test output --- src/test/regress/bin/normalize.sed | 15 + src/test/regress/expected/multi_explain.out | 6 +- .../regress/expected/multi_partitioning.out | 46 +- .../regress/expected/multi_partitioning_1.out | 1965 ----------------- 4 files changed, 41 insertions(+), 1991 deletions(-) delete mode 100644 src/test/regress/expected/multi_partitioning_1.out diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index f1069faaa..c2a1fa58d 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -161,6 +161,21 @@ s/Citus.*currently supports/Citus currently supports/g s/prepared transaction with identifier .* does not exist/prepared transaction with identifier "citus_x_yyyyyy_zzz_w" does not exist/g s/failed to roll back prepared transaction '.*'/failed to roll back prepared transaction 'citus_x_yyyyyy_zzz_w'/g +# Table aliases for partitioned tables in explain outputs might change +# regardless of postgres appended an _int suffix to alias, we always append _xxx suffix +# Can be removed when we remove support for pg11 and pg12. +# "-> Scan on __ _" and +# "-> Scan on __ " becomes +# "-> Scan on __ _xxx" +s/(->.*Scan on\ +)(.*)(_[0-9]+)(_[0-9]+) \2(_[0-9]+|_xxx)?/\1\2\3\4 \2_xxx/g + +# Table aliases for partitioned tables in "Hash Cond:" lines of explain outputs might change +# This is only for multi_partitioning.sql test file +# regardless of postgres appended an _int suffix to alias, we always append _xxx suffix +# Can be removed when we remove support for pg11 and pg12. +s/(partitioning_hash_join_test)(_[0-9]|_xxx)?(\.[a-zA-Z]+)/\1_xxx\3/g +s/(partitioning_hash_test)(_[0-9]|_xxx)?(\.[a-zA-Z]+)/\1_xxx\3/g + # Errors with binary decoding where OIDs should be normalized s/wrong data type: [0-9]+, expected [0-9]+/wrong data type: XXXX, expected XXXX/g diff --git a/src/test/regress/expected/multi_explain.out b/src/test/regress/expected/multi_explain.out index 9f483a970..c77b91d50 100644 --- a/src/test/regress/expected/multi_explain.out +++ b/src/test/regress/expected/multi_explain.out @@ -2900,7 +2900,7 @@ Custom Scan (Citus Adaptive) -> Insert on users_table_2_570028 citus_table_alias Conflict Resolution: UPDATE Conflict Arbiter Indexes: users_table_2_pkey_570028 - -> Seq Scan on users_table_2_570028 users_table_2 + -> Seq Scan on users_table_2_570028 users_table_xxx EXPLAIN :default_analyze_flags execute p4(20,20); Custom Scan (Citus Adaptive) (actual rows=0 loops=1) Task Count: 1 @@ -2912,7 +2912,7 @@ Custom Scan (Citus Adaptive) (actual rows=0 loops=1) Conflict Arbiter Indexes: users_table_2_pkey_570028 Tuples Inserted: 0 Conflicting Tuples: 0 - -> Seq Scan on users_table_2_570028 users_table_2 (actual rows=0 loops=1) + -> Seq Scan on users_table_2_570028 users_table_xxx (actual rows=0 loops=1) -- simple test to confirm we can fetch long (>4KB) plans EXPLAIN (ANALYZE, COSTS OFF, TIMING OFF, SUMMARY OFF) SELECT * FROM users_table_2 WHERE value_1::text = '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000X'; Custom Scan (Citus Adaptive) (actual rows=0 loops=1) @@ -2922,7 +2922,7 @@ Custom Scan (Citus Adaptive) (actual rows=0 loops=1) -> Task Tuple data received from node: 0 bytes Node: host=localhost port=xxxxx dbname=regression - -> Seq Scan on users_table_2_570028 users_table_2 (actual rows=0 loops=1) + -> Seq Scan on users_table_2_570028 users_table_xxx (actual rows=0 loops=1) Filter: ((value_1)::text = '00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000X'::text) -- sorted explain analyze output CREATE TABLE explain_analyze_execution_time (a int); diff --git a/src/test/regress/expected/multi_partitioning.out b/src/test/regress/expected/multi_partitioning.out index 94b468831..2e177214e 100644 --- a/src/test/regress/expected/multi_partitioning.out +++ b/src/test/regress/expected/multi_partitioning.out @@ -1611,16 +1611,16 @@ SELECT * FROM partitioning_hash_test JOIN partitioning_hash_join_test USING (id, -> Task Node: host=localhost port=xxxxx dbname=regression -> Hash Join - Hash Cond: ((partitioning_hash_join_test.id = partitioning_hash_test.id) AND (partitioning_hash_join_test.subid = partitioning_hash_test.subid)) + Hash Cond: ((partitioning_hash_join_test_xxx.id = partitioning_hash_test_xxx.id) AND (partitioning_hash_join_test_xxx.subid = partitioning_hash_test_xxx.subid)) -> Append - -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test_1 - -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_2 - -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_3 + -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test_xxx + -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_xxx + -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_xxx -> Hash -> Append - -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test_1 - -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_2 - -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_3 + -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test_xxx + -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_xxx + -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_xxx (16 rows) -- set partition-wise join on and parallel to off @@ -1651,20 +1651,20 @@ SELECT * FROM partitioning_hash_test JOIN partitioning_hash_join_test USING (id, Node: host=localhost port=xxxxx dbname=regression -> Append -> Hash Join - Hash Cond: ((partitioning_hash_join_test_1.id = partitioning_hash_test_1.id) AND (partitioning_hash_join_test_1.subid = partitioning_hash_test_1.subid)) - -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test_1 + Hash Cond: ((partitioning_hash_join_test_xxx.id = partitioning_hash_test_xxx.id) AND (partitioning_hash_join_test_xxx.subid = partitioning_hash_test_xxx.subid)) + -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test_xxx -> Hash - -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test_1 + -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test_xxx -> Hash Join - Hash Cond: ((partitioning_hash_test_2.id = partitioning_hash_join_test_2.id) AND (partitioning_hash_test_2.subid = partitioning_hash_join_test_2.subid)) - -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_2 + Hash Cond: ((partitioning_hash_test_xxx.id = partitioning_hash_join_test_xxx.id) AND (partitioning_hash_test_xxx.subid = partitioning_hash_join_test_xxx.subid)) + -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_xxx -> Hash - -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_2 + -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_xxx -> Hash Join - Hash Cond: ((partitioning_hash_join_test_3.id = partitioning_hash_test_3.id) AND (partitioning_hash_join_test_3.subid = partitioning_hash_test_3.subid)) - -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_3 + Hash Cond: ((partitioning_hash_join_test_xxx.id = partitioning_hash_test_xxx.id) AND (partitioning_hash_join_test_xxx.subid = partitioning_hash_test_xxx.subid)) + -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_xxx -> Hash - -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_3 + -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_xxx (21 rows) -- note that partition-wise joins only work when partition key is in the join @@ -1680,16 +1680,16 @@ SELECT * FROM partitioning_hash_test JOIN partitioning_hash_join_test USING (id) -> Task Node: host=localhost port=xxxxx dbname=regression -> Hash Join - Hash Cond: (partitioning_hash_join_test.id = partitioning_hash_test.id) + Hash Cond: (partitioning_hash_join_test_xxx.id = partitioning_hash_test_xxx.id) -> Append - -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test_1 - -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_2 - -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_3 + -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test_xxx + -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_xxx + -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_xxx -> Hash -> Append - -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test_1 - -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_2 - -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_3 + -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test_xxx + -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_xxx + -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_xxx (16 rows) -- reset partition-wise join diff --git a/src/test/regress/expected/multi_partitioning_1.out b/src/test/regress/expected/multi_partitioning_1.out deleted file mode 100644 index e85a4a2a9..000000000 --- a/src/test/regress/expected/multi_partitioning_1.out +++ /dev/null @@ -1,1965 +0,0 @@ --- --- Distributed Partitioned Table Tests --- -SET citus.next_shard_id TO 1660000; -SET citus.shard_count TO 4; -SET citus.shard_replication_factor TO 1; -SET citus.enable_repartition_joins to ON; --- --- Distributed Partitioned Table Creation Tests --- --- 1-) Distributing partitioned table --- create partitioned table -CREATE TABLE partitioning_test(id int, time date) PARTITION BY RANGE (time); -CREATE TABLE partitioning_hash_test(id int, subid int) PARTITION BY HASH(subid); --- create its partitions -CREATE TABLE partitioning_test_2009 PARTITION OF partitioning_test FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); -CREATE TABLE partitioning_test_2010 PARTITION OF partitioning_test FOR VALUES FROM ('2010-01-01') TO ('2011-01-01'); -CREATE TABLE partitioning_hash_test_0 PARTITION OF partitioning_hash_test FOR VALUES WITH (MODULUS 3, REMAINDER 0); -CREATE TABLE partitioning_hash_test_1 PARTITION OF partitioning_hash_test FOR VALUES WITH (MODULUS 3, REMAINDER 1); --- load some data and distribute tables -INSERT INTO partitioning_test VALUES (1, '2009-06-06'); -INSERT INTO partitioning_test VALUES (2, '2010-07-07'); -INSERT INTO partitioning_test_2009 VALUES (3, '2009-09-09'); -INSERT INTO partitioning_test_2010 VALUES (4, '2010-03-03'); -INSERT INTO partitioning_hash_test VALUES (1, 2); -INSERT INTO partitioning_hash_test VALUES (2, 13); -INSERT INTO partitioning_hash_test VALUES (3, 7); -INSERT INTO partitioning_hash_test VALUES (4, 4); --- distribute partitioned table -SELECT create_distributed_table('partitioning_test', 'id'); -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.partitioning_test_2009$$) -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.partitioning_test_2010$$) - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('partitioning_hash_test', 'id'); -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.partitioning_hash_test_0$$) -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.partitioning_hash_test_1$$) - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- see the data is loaded to shards -SELECT * FROM partitioning_test ORDER BY 1; - id | time ---------------------------------------------------------------------- - 1 | 06-06-2009 - 2 | 07-07-2010 - 3 | 09-09-2009 - 4 | 03-03-2010 -(4 rows) - -SELECT * FROM partitioning_hash_test ORDER BY 1; - id | subid ---------------------------------------------------------------------- - 1 | 2 - 2 | 13 - 3 | 7 - 4 | 4 -(4 rows) - --- see partitioned table and its partitions are distributed -SELECT - logicalrelid -FROM - pg_dist_partition -WHERE - logicalrelid IN ('partitioning_test', 'partitioning_test_2009', 'partitioning_test_2010') -ORDER BY 1; - logicalrelid ---------------------------------------------------------------------- - partitioning_test - partitioning_test_2009 - partitioning_test_2010 -(3 rows) - -SELECT - logicalrelid, count(*) -FROM pg_dist_shard - WHERE logicalrelid IN ('partitioning_test', 'partitioning_test_2009', 'partitioning_test_2010') -GROUP BY - logicalrelid -ORDER BY - 1,2; - logicalrelid | count ---------------------------------------------------------------------- - partitioning_test | 4 - partitioning_test_2009 | 4 - partitioning_test_2010 | 4 -(3 rows) - -SELECT - logicalrelid -FROM - pg_dist_partition -WHERE - logicalrelid IN ('partitioning_hash_test', 'partitioning_hash_test_0', 'partitioning_hash_test_1') -ORDER BY 1; - logicalrelid ---------------------------------------------------------------------- - partitioning_hash_test - partitioning_hash_test_0 - partitioning_hash_test_1 -(3 rows) - -SELECT - logicalrelid, count(*) -FROM pg_dist_shard - WHERE logicalrelid IN ('partitioning_hash_test', 'partitioning_hash_test_0', 'partitioning_hash_test_1') -GROUP BY - logicalrelid -ORDER BY - 1,2; - logicalrelid | count ---------------------------------------------------------------------- - partitioning_hash_test | 4 - partitioning_hash_test_0 | 4 - partitioning_hash_test_1 | 4 -(3 rows) - --- 2-) Creating partition of a distributed table -CREATE TABLE partitioning_test_2011 PARTITION OF partitioning_test FOR VALUES FROM ('2011-01-01') TO ('2012-01-01'); --- new partition is automatically distributed as well -SELECT - logicalrelid -FROM - pg_dist_partition -WHERE - logicalrelid IN ('partitioning_test', 'partitioning_test_2011') -ORDER BY 1; - logicalrelid ---------------------------------------------------------------------- - partitioning_test - partitioning_test_2011 -(2 rows) - -SELECT - logicalrelid, count(*) -FROM pg_dist_shard - WHERE logicalrelid IN ('partitioning_test', 'partitioning_test_2011') -GROUP BY - logicalrelid -ORDER BY - 1,2; - logicalrelid | count ---------------------------------------------------------------------- - partitioning_test | 4 - partitioning_test_2011 | 4 -(2 rows) - --- 3-) Attaching non distributed table to a distributed table -CREATE TABLE partitioning_test_2012(id int, time date); --- load some data -INSERT INTO partitioning_test_2012 VALUES (5, '2012-06-06'); -INSERT INTO partitioning_test_2012 VALUES (6, '2012-07-07'); -ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2012 FOR VALUES FROM ('2012-01-01') TO ('2013-01-01'); -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.partitioning_test_2012$$) --- attached partition is distributed as well -SELECT - logicalrelid -FROM - pg_dist_partition -WHERE - logicalrelid IN ('partitioning_test', 'partitioning_test_2012') -ORDER BY 1; - logicalrelid ---------------------------------------------------------------------- - partitioning_test - partitioning_test_2012 -(2 rows) - -SELECT - logicalrelid, count(*) -FROM pg_dist_shard - WHERE logicalrelid IN ('partitioning_test', 'partitioning_test_2012') -GROUP BY - logicalrelid -ORDER BY - 1,2; - logicalrelid | count ---------------------------------------------------------------------- - partitioning_test | 4 - partitioning_test_2012 | 4 -(2 rows) - --- try to insert a new data to hash partitioned table --- no partition is defined for value 5 -INSERT INTO partitioning_hash_test VALUES (8, 5); -ERROR: no partition of relation "partitioning_hash_test_1660012" found for row -DETAIL: Partition key of the failing row contains (subid) = (5). -CONTEXT: while executing command on localhost:xxxxx -INSERT INTO partitioning_hash_test VALUES (9, 12); -ERROR: no partition of relation "partitioning_hash_test_1660015" found for row -DETAIL: Partition key of the failing row contains (subid) = (12). -CONTEXT: while executing command on localhost:xxxxx -CREATE TABLE partitioning_hash_test_2 (id int, subid int); -INSERT INTO partitioning_hash_test_2 VALUES (8, 5); -ALTER TABLE partitioning_hash_test ATTACH PARTITION partitioning_hash_test_2 FOR VALUES WITH (MODULUS 3, REMAINDER 2); -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.partitioning_hash_test_2$$) -INSERT INTO partitioning_hash_test VALUES (9, 12); --- see the data is loaded to shards -SELECT * FROM partitioning_test ORDER BY 1; - id | time ---------------------------------------------------------------------- - 1 | 06-06-2009 - 2 | 07-07-2010 - 3 | 09-09-2009 - 4 | 03-03-2010 - 5 | 06-06-2012 - 6 | 07-07-2012 -(6 rows) - -SELECT * FROM partitioning_hash_test ORDER BY 1; - id | subid ---------------------------------------------------------------------- - 1 | 2 - 2 | 13 - 3 | 7 - 4 | 4 - 8 | 5 - 9 | 12 -(6 rows) - --- 4-) Attaching distributed table to distributed table -CREATE TABLE partitioning_test_2013(id int, time date); -SELECT create_distributed_table('partitioning_test_2013', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- load some data -INSERT INTO partitioning_test_2013 VALUES (7, '2013-06-06'); -INSERT INTO partitioning_test_2013 VALUES (8, '2013-07-07'); -ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2013 FOR VALUES FROM ('2013-01-01') TO ('2014-01-01'); --- see the data is loaded to shards -SELECT * FROM partitioning_test ORDER BY 1; - id | time ---------------------------------------------------------------------- - 1 | 06-06-2009 - 2 | 07-07-2010 - 3 | 09-09-2009 - 4 | 03-03-2010 - 5 | 06-06-2012 - 6 | 07-07-2012 - 7 | 06-06-2013 - 8 | 07-07-2013 -(8 rows) - --- 5-) Failure cases while creating distributed partitioned tables --- cannot distribute a partition if its parent is not distributed -CREATE TABLE partitioning_test_failure(id int, time date) PARTITION BY RANGE (time); -CREATE TABLE partitioning_test_failure_2009 PARTITION OF partitioning_test_failure FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); -SELECT create_distributed_table('partitioning_test_failure_2009', 'id'); -ERROR: cannot distribute relation "partitioning_test_failure_2009" which is partition of "partitioning_test_failure" -DETAIL: Citus does not support distributing partitions if their parent is not distributed table. -HINT: Distribute the partitioned table "partitioning_test_failure" instead. --- only hash distributed tables can have partitions -SELECT create_distributed_table('partitioning_test_failure', 'id', 'append'); -ERROR: distributing partitioned tables in only supported for hash-distributed tables -SELECT create_distributed_table('partitioning_test_failure', 'id', 'range'); -ERROR: distributing partitioned tables in only supported for hash-distributed tables -SELECT create_reference_table('partitioning_test_failure'); -ERROR: distributing partitioned tables in only supported for hash-distributed tables -SET citus.shard_replication_factor TO 1; --- non-distributed tables cannot have distributed partitions; -DROP TABLE partitioning_test_failure_2009; -CREATE TABLE partitioning_test_failure_2009(id int, time date); -SELECT create_distributed_table('partitioning_test_failure_2009', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -ALTER TABLE partitioning_test_failure ATTACH PARTITION partitioning_test_failure_2009 FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); -ERROR: non-distributed tables cannot have distributed partitions -HINT: Distribute the partitioned table "partitioning_test_failure_2009" instead --- multi-level partitioning is not allowed -DROP TABLE partitioning_test_failure_2009; -CREATE TABLE partitioning_test_failure_2009 PARTITION OF partitioning_test_failure FOR VALUES FROM ('2009-01-01') TO ('2010-01-01') PARTITION BY RANGE (time); -SELECT create_distributed_table('partitioning_test_failure', 'id'); -ERROR: distributing multi-level partitioned tables is not supported -DETAIL: Relation "partitioning_test_failure_2009" is partitioned table itself and it is also partition of relation "partitioning_test_failure". --- multi-level partitioning is not allowed in different order -DROP TABLE partitioning_test_failure_2009; -SELECT create_distributed_table('partitioning_test_failure', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE partitioning_test_failure_2009 PARTITION OF partitioning_test_failure FOR VALUES FROM ('2009-01-01') TO ('2010-01-01') PARTITION BY RANGE (time); -ERROR: distributing multi-level partitioned tables is not supported -DETAIL: Relation "partitioning_test_failure_2009" is partitioned table itself and it is also partition of relation "partitioning_test_failure". --- --- DMLs in distributed partitioned tables --- --- test COPY --- COPY data to partitioned table -COPY partitioning_test FROM STDIN WITH CSV; --- COPY data to partition directly -COPY partitioning_test_2009 FROM STDIN WITH CSV; --- see the data is loaded to shards -SELECT * FROM partitioning_test WHERE id >= 9 ORDER BY 1; - id | time ---------------------------------------------------------------------- - 9 | 01-01-2009 - 10 | 01-01-2010 - 11 | 01-01-2011 - 12 | 01-01-2012 - 13 | 01-02-2009 - 14 | 01-03-2009 -(6 rows) - --- test INSERT --- INSERT INTO the partitioned table -INSERT INTO partitioning_test VALUES(15, '2009-02-01'); -INSERT INTO partitioning_test VALUES(16, '2010-02-01'); -INSERT INTO partitioning_test VALUES(17, '2011-02-01'); -INSERT INTO partitioning_test VALUES(18, '2012-02-01'); --- INSERT INTO the partitions directly table -INSERT INTO partitioning_test VALUES(19, '2009-02-02'); -INSERT INTO partitioning_test VALUES(20, '2010-02-02'); --- see the data is loaded to shards -SELECT * FROM partitioning_test WHERE id >= 15 ORDER BY 1; - id | time ---------------------------------------------------------------------- - 15 | 02-01-2009 - 16 | 02-01-2010 - 17 | 02-01-2011 - 18 | 02-01-2012 - 19 | 02-02-2009 - 20 | 02-02-2010 -(6 rows) - --- test INSERT/SELECT --- INSERT/SELECT from partition to partitioned table -INSERT INTO partitioning_test SELECT * FROM partitioning_test_2011; --- INSERT/SELECT from partitioned table to partition -INSERT INTO partitioning_test_2012 SELECT * FROM partitioning_test WHERE time >= '2012-01-01' AND time < '2013-01-01'; --- see the data is loaded to shards (rows in the given range should be duplicated) -SELECT * FROM partitioning_test WHERE time >= '2011-01-01' AND time < '2013-01-01' ORDER BY 1; - id | time ---------------------------------------------------------------------- - 5 | 06-06-2012 - 5 | 06-06-2012 - 6 | 07-07-2012 - 6 | 07-07-2012 - 11 | 01-01-2011 - 11 | 01-01-2011 - 12 | 01-01-2012 - 12 | 01-01-2012 - 17 | 02-01-2011 - 17 | 02-01-2011 - 18 | 02-01-2012 - 18 | 02-01-2012 -(12 rows) - --- test UPDATE --- UPDATE partitioned table -UPDATE partitioning_test SET time = '2013-07-07' WHERE id = 7; --- UPDATE partition directly -UPDATE partitioning_test_2013 SET time = '2013-08-08' WHERE id = 8; --- see the data is updated -SELECT * FROM partitioning_test WHERE id = 7 OR id = 8 ORDER BY 1; - id | time ---------------------------------------------------------------------- - 7 | 07-07-2013 - 8 | 08-08-2013 -(2 rows) - --- UPDATE that tries to move a row to a non-existing partition (this should fail) -UPDATE partitioning_test SET time = '2020-07-07' WHERE id = 7; -ERROR: no partition of relation "partitioning_test_1660001" found for row -DETAIL: Partition key of the failing row contains ("time") = (2020-07-07). -CONTEXT: while executing command on localhost:xxxxx --- UPDATE with subqueries on partitioned table -UPDATE - partitioning_test -SET - time = time + INTERVAL '1 day' -WHERE - id IN (SELECT id FROM partitioning_test WHERE id = 1); --- UPDATE with subqueries on partition -UPDATE - partitioning_test_2009 -SET - time = time + INTERVAL '1 month' -WHERE - id IN (SELECT id FROM partitioning_test WHERE id = 2); --- see the data is updated -SELECT * FROM partitioning_test WHERE id = 1 OR id = 2 ORDER BY 1; - id | time ---------------------------------------------------------------------- - 1 | 06-07-2009 - 2 | 07-07-2010 -(2 rows) - --- test DELETE --- DELETE from partitioned table -DELETE FROM partitioning_test WHERE id = 9; --- DELETE from partition directly -DELETE FROM partitioning_test_2010 WHERE id = 10; --- see the data is deleted -SELECT * FROM partitioning_test WHERE id = 9 OR id = 10 ORDER BY 1; - id | time ---------------------------------------------------------------------- -(0 rows) - --- create default partition -CREATE TABLE partitioning_test_default PARTITION OF partitioning_test DEFAULT; -\d+ partitioning_test - Table "public.partitioning_test" - Column | Type | Collation | Nullable | Default | Storage | Stats target | Description ---------------------------------------------------------------------- - id | integer | | | | plain | | - time | date | | | | plain | | -Partition key: RANGE ("time") -Partitions: partitioning_test_2009 FOR VALUES FROM ('01-01-2009') TO ('01-01-2010'), - partitioning_test_2010 FOR VALUES FROM ('01-01-2010') TO ('01-01-2011'), - partitioning_test_2011 FOR VALUES FROM ('01-01-2011') TO ('01-01-2012'), - partitioning_test_2012 FOR VALUES FROM ('01-01-2012') TO ('01-01-2013'), - partitioning_test_2013 FOR VALUES FROM ('01-01-2013') TO ('01-01-2014'), - partitioning_test_default DEFAULT - -INSERT INTO partitioning_test VALUES(21, '2014-02-02'); -INSERT INTO partitioning_test VALUES(22, '2015-04-02'); --- see they are inserted into default partition -SELECT * FROM partitioning_test WHERE id > 20 ORDER BY 1, 2; - id | time ---------------------------------------------------------------------- - 21 | 02-02-2014 - 22 | 04-02-2015 -(2 rows) - -SELECT * FROM partitioning_test_default ORDER BY 1, 2; - id | time ---------------------------------------------------------------------- - 21 | 02-02-2014 - 22 | 04-02-2015 -(2 rows) - --- create a new partition (will fail) -CREATE TABLE partitioning_test_2014 PARTITION OF partitioning_test FOR VALUES FROM ('2014-01-01') TO ('2015-01-01'); -ERROR: updated partition constraint for default partition would be violated by some row -CONTEXT: while executing command on localhost:xxxxx -BEGIN; -ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_default; -CREATE TABLE partitioning_test_2014 PARTITION OF partitioning_test FOR VALUES FROM ('2014-01-01') TO ('2015-01-01'); -INSERT INTO partitioning_test SELECT * FROM partitioning_test_default WHERE time >= '2014-01-01' AND time < '2015-01-01'; -DELETE FROM partitioning_test_default WHERE time >= '2014-01-01' AND time < '2015-01-01'; -ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_default DEFAULT; -END; --- see data is in the table, but some moved out from default partition -SELECT * FROM partitioning_test WHERE id > 20 ORDER BY 1, 2; - id | time ---------------------------------------------------------------------- - 21 | 02-02-2014 - 22 | 04-02-2015 -(2 rows) - -SELECT * FROM partitioning_test_default ORDER BY 1, 2; - id | time ---------------------------------------------------------------------- - 22 | 04-02-2015 -(1 row) - --- multi-shard UPDATE on partitioned table -UPDATE partitioning_test SET time = time + INTERVAL '1 day'; --- see rows are UPDATED -SELECT * FROM partitioning_test ORDER BY 1; - id | time ---------------------------------------------------------------------- - 1 | 06-08-2009 - 2 | 07-08-2010 - 3 | 09-10-2009 - 4 | 03-04-2010 - 5 | 06-07-2012 - 5 | 06-07-2012 - 6 | 07-08-2012 - 6 | 07-08-2012 - 7 | 07-08-2013 - 8 | 08-09-2013 - 11 | 01-02-2011 - 11 | 01-02-2011 - 12 | 01-02-2012 - 12 | 01-02-2012 - 13 | 01-03-2009 - 14 | 01-04-2009 - 15 | 02-02-2009 - 16 | 02-02-2010 - 17 | 02-02-2011 - 17 | 02-02-2011 - 18 | 02-02-2012 - 18 | 02-02-2012 - 19 | 02-03-2009 - 20 | 02-03-2010 - 21 | 02-03-2014 - 22 | 04-03-2015 -(26 rows) - --- multi-shard UPDATE on partition directly -UPDATE partitioning_test_2009 SET time = time + INTERVAL '1 day'; --- see rows are UPDATED -SELECT * FROM partitioning_test_2009 ORDER BY 1; - id | time ---------------------------------------------------------------------- - 1 | 06-09-2009 - 3 | 09-11-2009 - 13 | 01-04-2009 - 14 | 01-05-2009 - 15 | 02-03-2009 - 19 | 02-04-2009 -(6 rows) - --- test multi-shard UPDATE which fails in workers (updated value is outside of partition bounds) -UPDATE partitioning_test_2009 SET time = time + INTERVAL '6 month'; -ERROR: new row for relation "partitioning_test_2009_1660005" violates partition constraint -DETAIL: Failing row contains (3, 2010-03-11). -CONTEXT: while executing command on localhost:xxxxx --- --- DDL in distributed partitioned tables --- --- test CREATE INDEX --- CREATE INDEX on partitioned table - this will error out --- on earlier versions of postgres earlier than 11. -CREATE INDEX partitioning_index ON partitioning_test(id); --- CREATE INDEX on partition -CREATE INDEX partitioning_2009_index ON partitioning_test_2009(id); --- CREATE INDEX CONCURRENTLY on partition -CREATE INDEX CONCURRENTLY partitioned_2010_index ON partitioning_test_2010(id); --- see index is created -SELECT tablename, indexname FROM pg_indexes WHERE tablename LIKE 'partitioning_test_%' ORDER BY indexname; - tablename | indexname ---------------------------------------------------------------------- - partitioning_test_2010 | partitioned_2010_index - partitioning_test_2009 | partitioning_2009_index - partitioning_test_2009 | partitioning_test_2009_id_idx - partitioning_test_2010 | partitioning_test_2010_id_idx - partitioning_test_2011 | partitioning_test_2011_id_idx - partitioning_test_2012 | partitioning_test_2012_id_idx - partitioning_test_2013 | partitioning_test_2013_id_idx - partitioning_test_2014 | partitioning_test_2014_id_idx - partitioning_test_default | partitioning_test_default_id_idx -(9 rows) - --- test drop --- indexes created on parent table can only be dropped on parent table --- ie using the same index name --- following will fail -DROP INDEX partitioning_test_2009_id_idx; -ERROR: cannot drop index partitioning_test_2009_id_idx because index partitioning_index requires it -HINT: You can drop index partitioning_index instead. --- but dropping index on parent table will succeed -DROP INDEX partitioning_index; --- this index was already created on partition table -DROP INDEX partitioning_2009_index; --- test drop index on non-distributed, partitioned table -CREATE TABLE non_distributed_partitioned_table(a int, b int) PARTITION BY RANGE (a); -CREATE TABLE non_distributed_partitioned_table_1 PARTITION OF non_distributed_partitioned_table -FOR VALUES FROM (0) TO (10); -CREATE INDEX non_distributed_partitioned_table_index ON non_distributed_partitioned_table(a); --- see index is created -SELECT tablename, indexname FROM pg_indexes WHERE tablename LIKE 'non_distributed_partitioned_table_%' ORDER BY indexname; - tablename | indexname ---------------------------------------------------------------------- - non_distributed_partitioned_table_1 | non_distributed_partitioned_table_1_a_idx -(1 row) - --- drop the index and see it is dropped -DROP INDEX non_distributed_partitioned_table_index; -SELECT tablename, indexname FROM pg_indexes WHERE tablename LIKE 'non_distributed%' ORDER BY indexname; - tablename | indexname ---------------------------------------------------------------------- -(0 rows) - --- test add COLUMN --- add COLUMN to partitioned table -ALTER TABLE partitioning_test ADD new_column int; --- add COLUMN to partition - this will error out -ALTER TABLE partitioning_test_2010 ADD new_column_2 int; -ERROR: cannot add column to a partition --- see additional column is created -SELECT name, type FROM table_attrs WHERE relid = 'partitioning_test'::regclass ORDER BY 1; - name | type ---------------------------------------------------------------------- - id | integer - new_column | integer - time | date -(3 rows) - -SELECT name, type FROM table_attrs WHERE relid = 'partitioning_test_2010'::regclass ORDER BY 1; - name | type ---------------------------------------------------------------------- - id | integer - new_column | integer - time | date -(3 rows) - --- test add PRIMARY KEY --- add PRIMARY KEY to partitioned table - this will error out -ALTER TABLE partitioning_test ADD CONSTRAINT partitioning_primary PRIMARY KEY (id); -ERROR: insufficient columns in PRIMARY KEY constraint definition -DETAIL: PRIMARY KEY constraint on table "partitioning_test" lacks column "time" which is part of the partition key. --- ADD PRIMARY KEY to partition -ALTER TABLE partitioning_test_2009 ADD CONSTRAINT partitioning_2009_primary PRIMARY KEY (id); --- see PRIMARY KEY is created -SELECT - table_name, - constraint_name, - constraint_type -FROM - information_schema.table_constraints -WHERE - table_name = 'partitioning_test_2009' AND - constraint_name = 'partitioning_2009_primary'; - table_name | constraint_name | constraint_type ---------------------------------------------------------------------- - partitioning_test_2009 | partitioning_2009_primary | PRIMARY KEY -(1 row) - --- however, you can add primary key if it contains both distribution and partition key -ALTER TABLE partitioning_hash_test ADD CONSTRAINT partitioning_hash_primary PRIMARY KEY (id, subid); --- see PRIMARY KEY is created -SELECT - table_name, - constraint_name, - constraint_type -FROM - information_schema.table_constraints -WHERE - table_name LIKE 'partitioning_hash_test%' AND - constraint_type = 'PRIMARY KEY' -ORDER BY 1; - table_name | constraint_name | constraint_type ---------------------------------------------------------------------- - partitioning_hash_test | partitioning_hash_primary | PRIMARY KEY - partitioning_hash_test_0 | partitioning_hash_test_0_pkey | PRIMARY KEY - partitioning_hash_test_1 | partitioning_hash_test_1_pkey | PRIMARY KEY - partitioning_hash_test_2 | partitioning_hash_test_2_pkey | PRIMARY KEY -(4 rows) - --- test ADD FOREIGN CONSTRAINT --- add FOREIGN CONSTRAINT to partitioned table -- this will error out (it is a self reference) -ALTER TABLE partitioning_test ADD CONSTRAINT partitioning_foreign FOREIGN KEY (id) REFERENCES partitioning_test_2009 (id); -ERROR: cannot ALTER TABLE "partitioning_test_2009" because it is being used by active queries in this session --- add FOREIGN CONSTRAINT to partition -INSERT INTO partitioning_test_2009 VALUES (5, '2009-06-06'); -INSERT INTO partitioning_test_2009 VALUES (6, '2009-07-07'); -INSERT INTO partitioning_test_2009 VALUES(12, '2009-02-01'); -INSERT INTO partitioning_test_2009 VALUES(18, '2009-02-01'); -ALTER TABLE partitioning_test_2012 ADD CONSTRAINT partitioning_2012_foreign FOREIGN KEY (id) REFERENCES partitioning_test_2009 (id) ON DELETE CASCADE; --- see FOREIGN KEY is created -SELECT "Constraint" FROM table_fkeys WHERE relid = 'partitioning_test_2012'::regclass ORDER BY 1; - Constraint ---------------------------------------------------------------------- - partitioning_2012_foreign -(1 row) - --- test ON DELETE CASCADE works -DELETE FROM partitioning_test_2009 WHERE id = 5; --- see that element is deleted from both partitions -SELECT * FROM partitioning_test_2009 WHERE id = 5 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - -SELECT * FROM partitioning_test_2012 WHERE id = 5 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- test DETACH partition -ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2009; --- see DETACHed partitions content is not accessible from partitioning_test; -SELECT * FROM partitioning_test WHERE time >= '2009-01-01' AND time < '2010-01-01' ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- delete from default partition -DELETE FROM partitioning_test WHERE time >= '2015-01-01'; -SELECT * FROM partitioning_test_default; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- create a reference table for foreign key test -CREATE TABLE partitioning_test_reference(id int PRIMARY KEY, subid int); -INSERT INTO partitioning_test_reference SELECT a, a FROM generate_series(1, 50) a; -SELECT create_reference_table('partitioning_test_reference'); -NOTICE: Copying data from local table... -NOTICE: copying the data has completed -DETAIL: The local data in the table is no longer visible, but is still on disk. -HINT: To remove the local data, run: SELECT truncate_local_data_after_distributing_table($$public.partitioning_test_reference$$) - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -ALTER TABLE partitioning_test ADD CONSTRAINT partitioning_reference_fkey FOREIGN KEY (id) REFERENCES partitioning_test_reference(id) ON DELETE CASCADE; -CREATE TABLE partitioning_test_foreign_key(id int PRIMARY KEY, value int); -SELECT create_distributed_table('partitioning_test_foreign_key', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -INSERT INTO partitioning_test_foreign_key SELECT * FROM partitioning_test_reference; -ALTER TABLE partitioning_hash_test ADD CONSTRAINT partitioning_reference_fk_test FOREIGN KEY (id) REFERENCES partitioning_test_foreign_key(id) ON DELETE CASCADE; --- check foreign keys on partitions -SELECT - table_name, constraint_name, constraint_type FROm information_schema.table_constraints -WHERE - table_name LIKE 'partitioning_hash_test%' AND - constraint_type = 'FOREIGN KEY' -ORDER BY - 1,2; - table_name | constraint_name | constraint_type ---------------------------------------------------------------------- - partitioning_hash_test | partitioning_reference_fk_test | FOREIGN KEY - partitioning_hash_test_0 | partitioning_reference_fk_test | FOREIGN KEY - partitioning_hash_test_1 | partitioning_reference_fk_test | FOREIGN KEY - partitioning_hash_test_2 | partitioning_reference_fk_test | FOREIGN KEY -(4 rows) - --- check foreign keys on partition shards --- there is some text ordering issue regarding table name --- forcing integer sort by extracting shardid -CREATE TYPE foreign_key_details AS (table_name text, constraint_name text, constraint_type text); -SELECT right(table_name, 7)::int as shardid, * FROM ( - SELECT (json_populate_record(NULL::foreign_key_details, - json_array_elements_text(result::json)::json )).* - FROM run_command_on_workers($$ - SELECT - COALESCE(json_agg(row_to_json(q)), '[]'::json) - FROM ( - SELECT - table_name, constraint_name, constraint_type - FROM information_schema.table_constraints - WHERE - table_name LIKE 'partitioning_hash_test%' AND - constraint_type = 'FOREIGN KEY' - ORDER BY 1, 2, 3 - ) q - $$) ) w -ORDER BY 1, 2, 3, 4; - shardid | table_name | constraint_name | constraint_type ---------------------------------------------------------------------- - 1660012 | partitioning_hash_test_1660012 | partitioning_reference_fk_test_1660012 | FOREIGN KEY - 1660013 | partitioning_hash_test_1660013 | partitioning_reference_fk_test_1660013 | FOREIGN KEY - 1660014 | partitioning_hash_test_1660014 | partitioning_reference_fk_test_1660014 | FOREIGN KEY - 1660015 | partitioning_hash_test_1660015 | partitioning_reference_fk_test_1660015 | FOREIGN KEY - 1660016 | partitioning_hash_test_0_1660016 | partitioning_reference_fk_test_1660012 | FOREIGN KEY - 1660017 | partitioning_hash_test_0_1660017 | partitioning_reference_fk_test_1660013 | FOREIGN KEY - 1660018 | partitioning_hash_test_0_1660018 | partitioning_reference_fk_test_1660014 | FOREIGN KEY - 1660019 | partitioning_hash_test_0_1660019 | partitioning_reference_fk_test_1660015 | FOREIGN KEY - 1660020 | partitioning_hash_test_1_1660020 | partitioning_reference_fk_test_1660012 | FOREIGN KEY - 1660021 | partitioning_hash_test_1_1660021 | partitioning_reference_fk_test_1660013 | FOREIGN KEY - 1660022 | partitioning_hash_test_1_1660022 | partitioning_reference_fk_test_1660014 | FOREIGN KEY - 1660023 | partitioning_hash_test_1_1660023 | partitioning_reference_fk_test_1660015 | FOREIGN KEY - 1660032 | partitioning_hash_test_2_1660032 | partitioning_reference_fk_test_1660012 | FOREIGN KEY - 1660033 | partitioning_hash_test_2_1660033 | partitioning_reference_fk_test_1660013 | FOREIGN KEY - 1660034 | partitioning_hash_test_2_1660034 | partitioning_reference_fk_test_1660014 | FOREIGN KEY - 1660035 | partitioning_hash_test_2_1660035 | partitioning_reference_fk_test_1660015 | FOREIGN KEY -(16 rows) - -DROP TYPE foreign_key_details; --- set replication factor back to 1 since it gots reset --- after connection re-establishment -SET citus.shard_replication_factor TO 1; -SELECT * FROM partitioning_test WHERE id = 11 or id = 12; - id | time | new_column ---------------------------------------------------------------------- - 11 | 01-02-2011 | - 11 | 01-02-2011 | - 12 | 01-02-2012 | - 12 | 01-02-2012 | -(4 rows) - -DELETE FROM partitioning_test_reference WHERE id = 11 or id = 12; -SELECT * FROM partitioning_hash_test ORDER BY 1, 2; - id | subid ---------------------------------------------------------------------- - 1 | 2 - 2 | 13 - 3 | 7 - 4 | 4 - 8 | 5 - 9 | 12 -(6 rows) - -DELETE FROM partitioning_test_foreign_key WHERE id = 2 OR id = 9; --- see data is deleted from referencing table -SELECT * FROM partitioning_test WHERE id = 11 or id = 12; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - -SELECT * FROM partitioning_hash_test ORDER BY 1, 2; - id | subid ---------------------------------------------------------------------- - 1 | 2 - 3 | 7 - 4 | 4 - 8 | 5 -(4 rows) - --- --- Transaction tests --- --- DDL in transaction -BEGIN; -ALTER TABLE partitioning_test ADD newer_column int; --- see additional column is created -SELECT name, type FROM table_attrs WHERE relid = 'partitioning_test'::regclass ORDER BY 1; - name | type ---------------------------------------------------------------------- - id | integer - new_column | integer - newer_column | integer - time | date -(4 rows) - -ROLLBACK; --- see rollback is successful -SELECT name, type FROM table_attrs WHERE relid = 'partitioning_test'::regclass ORDER BY 1; - name | type ---------------------------------------------------------------------- - id | integer - new_column | integer - time | date -(3 rows) - --- COPY in transaction -BEGIN; -COPY partitioning_test FROM STDIN WITH CSV; --- see the data is loaded to shards -SELECT * FROM partitioning_test WHERE id = 22 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- - 22 | 01-01-2010 | 22 -(1 row) - -SELECT * FROM partitioning_test WHERE id = 23 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- - 23 | 01-01-2011 | 23 -(1 row) - -SELECT * FROM partitioning_test WHERE id = 24 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- - 24 | 01-01-2013 | 24 -(1 row) - -ROLLBACK; --- see rollback is successful -SELECT * FROM partitioning_test WHERE id >= 22 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- DML in transaction -BEGIN; --- INSERT in transaction -INSERT INTO partitioning_test VALUES(25, '2010-02-02'); --- see the data is loaded to shards -SELECT * FROM partitioning_test WHERE id = 25 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- - 25 | 02-02-2010 | -(1 row) - --- INSERT/SELECT in transaction -INSERT INTO partitioning_test SELECT * FROM partitioning_test WHERE id = 25; --- see the data is loaded to shards -SELECT * FROM partitioning_test WHERE id = 25 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- - 25 | 02-02-2010 | - 25 | 02-02-2010 | -(2 rows) - --- UPDATE in transaction -UPDATE partitioning_test SET time = '2010-10-10' WHERE id = 25; --- see the data is updated -SELECT * FROM partitioning_test WHERE id = 25 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- - 25 | 10-10-2010 | - 25 | 10-10-2010 | -(2 rows) - --- perform operations on partition and partioned tables together -INSERT INTO partitioning_test VALUES(26, '2010-02-02', 26); -INSERT INTO partitioning_test_2010 VALUES(26, '2010-02-02', 26); -COPY partitioning_test FROM STDIN WITH CSV; -COPY partitioning_test_2010 FROM STDIN WITH CSV; --- see the data is loaded to shards (we should see 4 rows with same content) -SELECT * FROM partitioning_test WHERE id = 26 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- - 26 | 02-02-2010 | 26 - 26 | 02-02-2010 | 26 - 26 | 02-02-2010 | 26 - 26 | 02-02-2010 | 26 -(4 rows) - -ROLLBACK; --- see rollback is successful -SELECT * FROM partitioning_test WHERE id = 26 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- DETACH and DROP in a transaction -BEGIN; -ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2011; -DROP TABLE partitioning_test_2011; -COMMIT; --- see DROPed partitions content is not accessible -SELECT * FROM partitioning_test WHERE time >= '2011-01-01' AND time < '2012-01-01' ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- --- Misc tests --- --- test TRUNCATE --- test TRUNCATE partition -TRUNCATE partitioning_test_2012; --- see partition is TRUNCATEd -SELECT * FROM partitioning_test_2012 ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- test TRUNCATE partitioned table -TRUNCATE partitioning_test; --- see partitioned table is TRUNCATEd -SELECT * FROM partitioning_test ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- test DROP --- test DROP partition -INSERT INTO partitioning_test_2010 VALUES(27, '2010-02-01'); -DROP TABLE partitioning_test_2010; --- see DROPped partitions content is not accessible from partitioning_test; -SELECT * FROM partitioning_test WHERE time >= '2010-01-01' AND time < '2011-01-01' ORDER BY 1; - id | time | new_column ---------------------------------------------------------------------- -(0 rows) - --- test DROP partitioned table -DROP TABLE partitioning_test; -DROP TABLE partitioning_test_reference; --- dropping the parent should CASCADE to the children as well -SELECT table_name FROM information_schema.tables WHERE table_name LIKE 'partitioning_test%' ORDER BY 1; - table_name ---------------------------------------------------------------------- - partitioning_test_2009 - partitioning_test_failure - partitioning_test_foreign_key -(3 rows) - --- test distributing partitioned table colocated with non-partitioned table -CREATE TABLE partitioned_users_table (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint) PARTITION BY RANGE (time); -CREATE TABLE partitioned_events_table (user_id int, time timestamp, event_type int, value_2 int, value_3 float, value_4 bigint) PARTITION BY RANGE (time); -SELECT create_distributed_table('partitioned_users_table', 'user_id', colocate_with => 'users_table'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('partitioned_events_table', 'user_id', colocate_with => 'events_table'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- INSERT/SELECT from regular table to partitioned table -CREATE TABLE partitioned_users_table_2009 PARTITION OF partitioned_users_table FOR VALUES FROM ('2017-01-01') TO ('2018-01-01'); -CREATE TABLE partitioned_events_table_2009 PARTITION OF partitioned_events_table FOR VALUES FROM ('2017-01-01') TO ('2018-01-01'); -INSERT INTO partitioned_events_table SELECT * FROM events_table; -INSERT INTO partitioned_users_table_2009 SELECT * FROM users_table; --- --- Complex JOINs, subqueries, UNIONs etc... --- --- subquery with UNIONs on partitioned table -SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType -FROM - (SELECT *, random() - FROM - (SELECT - "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" - FROM - (SELECT - "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events - FROM( - (SELECT - "events"."user_id", "events"."time", 0 AS event - FROM - partitioned_events_table as "events" - WHERE - event_type IN (1, 2) ) - UNION - (SELECT - "events"."user_id", "events"."time", 1 AS event - FROM - partitioned_events_table as "events" - WHERE - event_type IN (3, 4) ) - UNION - (SELECT - "events"."user_id", "events"."time", 2 AS event - FROM - partitioned_events_table as "events" - WHERE - event_type IN (5, 6) ) - UNION - (SELECT - "events"."user_id", "events"."time", 3 AS event - FROM - partitioned_events_table as "events" - WHERE - event_type IN (1, 6))) t1 - GROUP BY "t1"."user_id") AS t) "q" -) AS final_query -GROUP BY types -ORDER BY types; - types | sumofeventtype ---------------------------------------------------------------------- - 0 | 43 - 1 | 44 - 2 | 8 - 3 | 25 -(4 rows) - --- UNION and JOIN on both partitioned and regular tables -SELECT ("final_query"."event_types") as types, count(*) AS sumOfEventType -FROM - (SELECT - *, random() - FROM - (SELECT - "t"."user_id", "t"."time", unnest("t"."collected_events") AS "event_types" - FROM - (SELECT - "t1"."user_id", min("t1"."time") AS "time", array_agg(("t1"."event") ORDER BY TIME ASC, event DESC) AS collected_events - FROM ( - (SELECT - * - FROM - (SELECT - "events"."time", 0 AS event, "events"."user_id" - FROM - partitioned_events_table as "events" - WHERE - event_type IN (1, 2)) events_subquery_1) - UNION - (SELECT * - FROM - ( - SELECT * FROM - ( - SELECT - max("events"."time"), - 0 AS event, - "events"."user_id" - FROM - events_table as "events", users_table as "users" - WHERE - events.user_id = users.user_id AND - event_type IN (1, 2) - GROUP BY "events"."user_id" - ) as events_subquery_5 - ) events_subquery_2) - UNION - (SELECT * - FROM - (SELECT - "events"."time", 2 AS event, "events"."user_id" - FROM - partitioned_events_table as "events" - WHERE - event_type IN (3, 4)) events_subquery_3) - UNION - (SELECT * - FROM - (SELECT - "events"."time", 3 AS event, "events"."user_id" - FROM - events_table as "events" - WHERE - event_type IN (5, 6)) events_subquery_4) - ) t1 - GROUP BY "t1"."user_id") AS t) "q" -INNER JOIN - (SELECT - "users"."user_id" - FROM - partitioned_users_table as "users" - WHERE - value_1 > 2 and value_1 < 5) AS t - ON (t.user_id = q.user_id)) as final_query -GROUP BY - types -ORDER BY - types; - types | sumofeventtype ---------------------------------------------------------------------- - 0 | 367 - 2 | 360 - 3 | 57 -(3 rows) - --- test LIST partitioning -CREATE TABLE list_partitioned_events_table (user_id int, time date, event_type int, value_2 int, value_3 float, value_4 bigint) PARTITION BY LIST (time); -CREATE TABLE list_partitioned_events_table_2014_01_01_05 PARTITION OF list_partitioned_events_table FOR VALUES IN ('2017-11-21', '2017-11-22', '2017-11-23', '2017-11-24', '2017-11-25'); -CREATE TABLE list_partitioned_events_table_2014_01_06_10 PARTITION OF list_partitioned_events_table FOR VALUES IN ('2017-11-26', '2017-11-27', '2017-11-28', '2017-11-29', '2017-11-30'); -CREATE TABLE list_partitioned_events_table_2014_01_11_15 PARTITION OF list_partitioned_events_table FOR VALUES IN ('2017-12-01', '2017-12-02', '2017-12-03', '2017-12-04', '2017-12-05'); --- test distributing partitioned table colocated with another partitioned table -SELECT create_distributed_table('list_partitioned_events_table', 'user_id', colocate_with => 'partitioned_events_table'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- INSERT/SELECT from partitioned table to partitioned table -INSERT INTO - list_partitioned_events_table -SELECT - user_id, - date_trunc('day', time) as time, - event_type, - value_2, - value_3, - value_4 -FROM - events_table -WHERE - time >= '2017-11-21' AND - time <= '2017-12-01'; --- LEFT JOINs used with INNER JOINs on range partitioned table, list partitioned table and non-partitioned table -SELECT -count(*) AS cnt, "generated_group_field" - FROM - (SELECT - "eventQuery"."user_id", random(), generated_group_field - FROM - (SELECT - "multi_group_wrapper_1".*, generated_group_field, random() - FROM - (SELECT * - FROM - (SELECT - "list_partitioned_events_table"."time", "list_partitioned_events_table"."user_id" as event_user_id - FROM - list_partitioned_events_table as "list_partitioned_events_table" - WHERE - user_id > 2) "temp_data_queries" - INNER JOIN - (SELECT - "users"."user_id" - FROM - partitioned_users_table as "users" - WHERE - user_id > 2 and value_2 = 1) "user_filters_1" - ON ("temp_data_queries".event_user_id = "user_filters_1".user_id)) AS "multi_group_wrapper_1" - LEFT JOIN - (SELECT - "users"."user_id" AS "user_id", value_2 AS "generated_group_field" - FROM - partitioned_users_table as "users") "left_group_by_1" - ON ("left_group_by_1".user_id = "multi_group_wrapper_1".event_user_id)) "eventQuery") "pushedDownQuery" - GROUP BY - "generated_group_field" - ORDER BY - cnt DESC, generated_group_field ASC - LIMIT 10; - cnt | generated_group_field ---------------------------------------------------------------------- - 1851 | 1 - 1077 | 4 - 963 | 2 - 955 | 3 - 768 | 5 - 639 | 0 -(6 rows) - --- --- Additional partitioning features --- --- test multi column partitioning -CREATE TABLE multi_column_partitioning(c1 int, c2 int) PARTITION BY RANGE (c1, c2); -CREATE TABLE multi_column_partitioning_0_0_10_0 PARTITION OF multi_column_partitioning FOR VALUES FROM (0, 0) TO (10, 0); -SELECT create_distributed_table('multi_column_partitioning', 'c1'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- test INSERT to multi-column partitioned table -INSERT INTO multi_column_partitioning VALUES(1, 1); -INSERT INTO multi_column_partitioning_0_0_10_0 VALUES(5, -5); --- test INSERT to multi-column partitioned table where no suitable partition exists -INSERT INTO multi_column_partitioning VALUES(10, 1); -ERROR: no partition of relation "multi_column_partitioning_1660101" found for row -DETAIL: Partition key of the failing row contains (c1, c2) = (10, 1). -CONTEXT: while executing command on localhost:xxxxx --- test with MINVALUE/MAXVALUE -CREATE TABLE multi_column_partitioning_10_max_20_min PARTITION OF multi_column_partitioning FOR VALUES FROM (10, MAXVALUE) TO (20, MINVALUE); --- test INSERT to partition with MINVALUE/MAXVALUE bounds -INSERT INTO multi_column_partitioning VALUES(11, -11); -INSERT INTO multi_column_partitioning_10_max_20_min VALUES(19, -19); --- test INSERT to multi-column partitioned table where no suitable partition exists -INSERT INTO multi_column_partitioning VALUES(20, -20); -ERROR: no partition of relation "multi_column_partitioning_1660101" found for row -DETAIL: Partition key of the failing row contains (c1, c2) = (20, -20). -CONTEXT: while executing command on localhost:xxxxx --- see data is loaded to multi-column partitioned table -SELECT * FROM multi_column_partitioning ORDER BY 1, 2; - c1 | c2 ---------------------------------------------------------------------- - 1 | 1 - 5 | -5 - 11 | -11 - 19 | -19 -(4 rows) - --- --- Tests for locks on partitioned tables --- -CREATE TABLE partitioning_locks(id int, ref_id int, time date) PARTITION BY RANGE (time); --- create its partitions -CREATE TABLE partitioning_locks_2009 PARTITION OF partitioning_locks FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); -CREATE TABLE partitioning_locks_2010 PARTITION OF partitioning_locks FOR VALUES FROM ('2010-01-01') TO ('2011-01-01'); --- distribute partitioned table -SELECT create_distributed_table('partitioning_locks', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - --- test locks on router SELECT -BEGIN; -SELECT * FROM partitioning_locks WHERE id = 1 ORDER BY 1, 2; - id | ref_id | time ---------------------------------------------------------------------- -(0 rows) - -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2010 | relation | AccessShareLock -(3 rows) - -COMMIT; --- test locks on real-time SELECT -BEGIN; -SELECT * FROM partitioning_locks ORDER BY 1, 2; - id | ref_id | time ---------------------------------------------------------------------- -(0 rows) - -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2010 | relation | AccessShareLock -(3 rows) - -COMMIT; -BEGIN; -SELECT * FROM partitioning_locks AS pl1 JOIN partitioning_locks AS pl2 ON pl1.id = pl2.ref_id ORDER BY 1, 2; - id | ref_id | time | id | ref_id | time ---------------------------------------------------------------------- -(0 rows) - -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2010 | relation | AccessShareLock -(3 rows) - -COMMIT; --- test locks on INSERT -BEGIN; -INSERT INTO partitioning_locks VALUES(1, 1, '2009-01-01'); -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks | relation | RowExclusiveLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2009 | relation | RowExclusiveLock - partitioning_locks_2010 | relation | AccessShareLock - partitioning_locks_2010 | relation | RowExclusiveLock -(6 rows) - -COMMIT; --- test locks on UPDATE -BEGIN; -UPDATE partitioning_locks SET time = '2009-02-01' WHERE id = 1; -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks | relation | RowExclusiveLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2009 | relation | RowExclusiveLock - partitioning_locks_2010 | relation | AccessShareLock - partitioning_locks_2010 | relation | RowExclusiveLock -(6 rows) - -COMMIT; --- test locks on DELETE -BEGIN; -DELETE FROM partitioning_locks WHERE id = 1; -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks | relation | RowExclusiveLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2009 | relation | RowExclusiveLock - partitioning_locks_2010 | relation | AccessShareLock - partitioning_locks_2010 | relation | RowExclusiveLock -(6 rows) - -COMMIT; --- test locks on INSERT/SELECT -CREATE TABLE partitioning_locks_for_select(id int, ref_id int, time date); -SELECT create_distributed_table('partitioning_locks_for_select', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -BEGIN; -INSERT INTO partitioning_locks SELECT * FROM partitioning_locks_for_select; -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks | relation | RowExclusiveLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2009 | relation | RowExclusiveLock - partitioning_locks_2010 | relation | AccessShareLock - partitioning_locks_2010 | relation | RowExclusiveLock - partitioning_locks_for_select | relation | AccessShareLock -(7 rows) - -COMMIT; --- test locks on coordinator INSERT/SELECT -BEGIN; -INSERT INTO partitioning_locks SELECT * FROM partitioning_locks_for_select LIMIT 5; -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks | relation | RowExclusiveLock - partitioning_locks_2009 | relation | RowExclusiveLock - partitioning_locks_2010 | relation | RowExclusiveLock - partitioning_locks_for_select | relation | AccessShareLock -(5 rows) - -COMMIT; --- test locks on multi-shard UPDATE -BEGIN; -UPDATE partitioning_locks SET time = '2009-03-01'; -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessShareLock - partitioning_locks | relation | RowExclusiveLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2009 | relation | RowExclusiveLock - partitioning_locks_2010 | relation | AccessShareLock - partitioning_locks_2010 | relation | RowExclusiveLock -(6 rows) - -COMMIT; --- test locks on DDL -BEGIN; -ALTER TABLE partitioning_locks ADD COLUMN new_column int; -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessExclusiveLock - partitioning_locks | relation | AccessShareLock - partitioning_locks_2009 | relation | AccessExclusiveLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2010 | relation | AccessExclusiveLock - partitioning_locks_2010 | relation | AccessShareLock -(6 rows) - -COMMIT; --- test locks on TRUNCATE -BEGIN; -TRUNCATE partitioning_locks; -SELECT relation::regclass, locktype, mode FROM pg_locks WHERE relation::regclass::text LIKE 'partitioning_locks%' AND pid = pg_backend_pid() ORDER BY 1, 2, 3; - relation | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | relation | AccessExclusiveLock - partitioning_locks | relation | AccessShareLock - partitioning_locks_2009 | relation | AccessExclusiveLock - partitioning_locks_2009 | relation | AccessShareLock - partitioning_locks_2009 | relation | ShareLock - partitioning_locks_2010 | relation | AccessExclusiveLock - partitioning_locks_2010 | relation | AccessShareLock - partitioning_locks_2010 | relation | ShareLock -(8 rows) - -COMMIT; -CREATE VIEW lockinfo AS - SELECT - logicalrelid, - CASE - WHEN l.objsubid = 5 THEN 'shard' - WHEN l.objsubid = 4 THEN 'shard_metadata' - ELSE 'colocated_shards_metadata' - END AS locktype, - mode - FROM - pg_locks AS l JOIN (select row_number() over (partition by logicalrelid order by shardminvalue) -1 as shardintervalindex, * from pg_dist_shard) AS s - ON - (l.objsubid IN (4, 5) AND l.objid = s.shardid ) - OR (l.objsubid = 8 - AND l.objid IN (select colocationid from pg_dist_partition AS p where p.logicalrelid = s.logicalrelid) - AND l.classid = shardintervalindex - ) - WHERE - logicalrelid IN ('partitioning_locks', 'partitioning_locks_2009', 'partitioning_locks_2010') - AND pid = pg_backend_pid() - AND l.locktype = 'advisory' - ORDER BY - 1, 2, 3; --- test shard resource locks with multi-shard UPDATE -BEGIN; -UPDATE partitioning_locks_2009 SET time = '2009-03-01'; --- see the locks on parent table -SELECT * FROM lockinfo; - logicalrelid | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | shard | ShareUpdateExclusiveLock - partitioning_locks | shard | ShareUpdateExclusiveLock - partitioning_locks | shard | ShareUpdateExclusiveLock - partitioning_locks | shard | ShareUpdateExclusiveLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | shard | ShareUpdateExclusiveLock - partitioning_locks_2009 | shard | ShareUpdateExclusiveLock - partitioning_locks_2009 | shard | ShareUpdateExclusiveLock - partitioning_locks_2009 | shard | ShareUpdateExclusiveLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock -(20 rows) - -COMMIT; --- test shard resource locks with TRUNCATE -BEGIN; -TRUNCATE partitioning_locks_2009; --- see the locks on parent table -SELECT * FROM lockinfo; - logicalrelid | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock -(12 rows) - -COMMIT; --- test shard resource locks with INSERT/SELECT -BEGIN; -INSERT INTO partitioning_locks_2009 SELECT * FROM partitioning_locks WHERE time >= '2009-01-01' AND time < '2010-01-01'; --- see the locks on parent table -SELECT * FROM lockinfo; - logicalrelid | locktype | mode ---------------------------------------------------------------------- - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | colocated_shards_metadata | ShareLock - partitioning_locks | shard | ShareUpdateExclusiveLock - partitioning_locks | shard | ShareUpdateExclusiveLock - partitioning_locks | shard | ShareUpdateExclusiveLock - partitioning_locks | shard | ShareUpdateExclusiveLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | colocated_shards_metadata | ShareLock - partitioning_locks_2009 | shard | ShareUpdateExclusiveLock - partitioning_locks_2009 | shard | ShareUpdateExclusiveLock - partitioning_locks_2009 | shard | ShareUpdateExclusiveLock - partitioning_locks_2009 | shard | ShareUpdateExclusiveLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock - partitioning_locks_2010 | colocated_shards_metadata | ShareLock -(20 rows) - -COMMIT; --- test partition-wise join -CREATE TABLE partitioning_hash_join_test(id int, subid int) PARTITION BY HASH(subid); -CREATE TABLE partitioning_hash_join_test_0 PARTITION OF partitioning_hash_join_test FOR VALUES WITH (MODULUS 3, REMAINDER 0); -CREATE TABLE partitioning_hash_join_test_1 PARTITION OF partitioning_hash_join_test FOR VALUES WITH (MODULUS 3, REMAINDER 1); -CREATE TABLE partitioning_hash_join_test_2 PARTITION OF partitioning_hash_join_test FOR VALUES WITH (MODULUS 3, REMAINDER 2); -SELECT create_distributed_table('partitioning_hash_join_test', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT success FROM run_command_on_workers('alter system set enable_mergejoin to off'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('alter system set enable_nestloop to off'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('alter system set enable_indexscan to off'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('alter system set enable_indexonlyscan to off'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('alter system set enable_partitionwise_join to off'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('select pg_reload_conf()'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -EXPLAIN (COSTS OFF) -SELECT * FROM partitioning_hash_test JOIN partitioning_hash_join_test USING (id, subid); - QUERY PLAN ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Hash Join - Hash Cond: ((partitioning_hash_join_test.id = partitioning_hash_test.id) AND (partitioning_hash_join_test.subid = partitioning_hash_test.subid)) - -> Append - -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test - -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_1 - -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_2 - -> Hash - -> Append - -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test - -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_1 - -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_2 -(16 rows) - --- set partition-wise join on and parallel to off -SELECT success FROM run_command_on_workers('alter system set enable_partitionwise_join to on'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('select pg_reload_conf()'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SET enable_partitionwise_join TO on; -ANALYZE partitioning_hash_test, partitioning_hash_join_test; -EXPLAIN (COSTS OFF) -SELECT * FROM partitioning_hash_test JOIN partitioning_hash_join_test USING (id, subid); - QUERY PLAN ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Append - -> Hash Join - Hash Cond: ((partitioning_hash_join_test.id = partitioning_hash_test.id) AND (partitioning_hash_join_test.subid = partitioning_hash_test.subid)) - -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test - -> Hash - -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test - -> Hash Join - Hash Cond: ((partitioning_hash_test_1.id = partitioning_hash_join_test_1.id) AND (partitioning_hash_test_1.subid = partitioning_hash_join_test_1.subid)) - -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_1 - -> Hash - -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_1 - -> Hash Join - Hash Cond: ((partitioning_hash_join_test_2.id = partitioning_hash_test_2.id) AND (partitioning_hash_join_test_2.subid = partitioning_hash_test_2.subid)) - -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_2 - -> Hash - -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_2 -(21 rows) - --- note that partition-wise joins only work when partition key is in the join --- following join does not have that, therefore join will not be pushed down to --- partitions -EXPLAIN (COSTS OFF) -SELECT * FROM partitioning_hash_test JOIN partitioning_hash_join_test USING (id); - QUERY PLAN ---------------------------------------------------------------------- - Custom Scan (Citus Adaptive) - Task Count: 4 - Tasks Shown: One of 4 - -> Task - Node: host=localhost port=xxxxx dbname=regression - -> Hash Join - Hash Cond: (partitioning_hash_join_test.id = partitioning_hash_test.id) - -> Append - -> Seq Scan on partitioning_hash_join_test_0_1660133 partitioning_hash_join_test - -> Seq Scan on partitioning_hash_join_test_1_1660137 partitioning_hash_join_test_1 - -> Seq Scan on partitioning_hash_join_test_2_1660141 partitioning_hash_join_test_2 - -> Hash - -> Append - -> Seq Scan on partitioning_hash_test_0_1660016 partitioning_hash_test - -> Seq Scan on partitioning_hash_test_1_1660020 partitioning_hash_test_1 - -> Seq Scan on partitioning_hash_test_2_1660032 partitioning_hash_test_2 -(16 rows) - --- reset partition-wise join -SELECT success FROM run_command_on_workers('alter system reset enable_partitionwise_join'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('alter system reset enable_mergejoin'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('alter system reset enable_nestloop'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('alter system reset enable_indexscan'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('alter system reset enable_indexonlyscan'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -SELECT success FROM run_command_on_workers('select pg_reload_conf()'); - success ---------------------------------------------------------------------- - t - t -(2 rows) - -RESET enable_partitionwise_join; -DROP VIEW lockinfo; -DROP TABLE -IF EXISTS - partitioning_test_2009, - partitioned_events_table, - partitioned_users_table, - list_partitioned_events_table, - multi_column_partitioning, - partitioning_locks, - partitioning_locks_for_select; --- make sure we can create a partitioned table with streaming replication -SET citus.replication_model TO 'streaming'; -CREATE TABLE partitioning_test(id int, time date) PARTITION BY RANGE (time); -CREATE TABLE partitioning_test_2009 PARTITION OF partitioning_test FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); -SELECT create_distributed_table('partitioning_test', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -DROP TABLE partitioning_test; --- make sure we can attach partitions to a distributed table in a schema -CREATE SCHEMA partitioning_schema; -CREATE TABLE partitioning_schema."schema-test"(id int, time date) PARTITION BY RANGE (time); -SELECT create_distributed_table('partitioning_schema."schema-test"', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE partitioning_schema."schema-test_2009"(id int, time date); -ALTER TABLE partitioning_schema."schema-test" ATTACH PARTITION partitioning_schema."schema-test_2009" FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); --- attached partition is distributed as well -SELECT - logicalrelid -FROM - pg_dist_partition -WHERE - logicalrelid IN ('partitioning_schema."schema-test"'::regclass, 'partitioning_schema."schema-test_2009"'::regclass) -ORDER BY 1; - logicalrelid ---------------------------------------------------------------------- - partitioning_schema."schema-test" - partitioning_schema."schema-test_2009" -(2 rows) - -SELECT - logicalrelid, count(*) -FROM - pg_dist_shard -WHERE - logicalrelid IN ('partitioning_schema."schema-test"'::regclass, 'partitioning_schema."schema-test_2009"'::regclass) -GROUP BY - logicalrelid -ORDER BY - 1,2; - logicalrelid | count ---------------------------------------------------------------------- - partitioning_schema."schema-test" | 4 - partitioning_schema."schema-test_2009" | 4 -(2 rows) - -DROP TABLE partitioning_schema."schema-test"; --- make sure we can create partition of a distributed table in a schema -CREATE TABLE partitioning_schema."schema-test"(id int, time date) PARTITION BY RANGE (time); -SELECT create_distributed_table('partitioning_schema."schema-test"', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE partitioning_schema."schema-test_2009" PARTITION OF partitioning_schema."schema-test" FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); --- newly created partition is distributed as well -SELECT - logicalrelid -FROM - pg_dist_partition -WHERE - logicalrelid IN ('partitioning_schema."schema-test"'::regclass, 'partitioning_schema."schema-test_2009"'::regclass) -ORDER BY 1; - logicalrelid ---------------------------------------------------------------------- - partitioning_schema."schema-test" - partitioning_schema."schema-test_2009" -(2 rows) - -SELECT - logicalrelid, count(*) -FROM - pg_dist_shard -WHERE - logicalrelid IN ('partitioning_schema."schema-test"'::regclass, 'partitioning_schema."schema-test_2009"'::regclass) -GROUP BY - logicalrelid -ORDER BY - 1,2; - logicalrelid | count ---------------------------------------------------------------------- - partitioning_schema."schema-test" | 4 - partitioning_schema."schema-test_2009" | 4 -(2 rows) - -DROP TABLE partitioning_schema."schema-test"; --- make sure creating partitioned tables works while search_path is set -CREATE TABLE partitioning_schema."schema-test"(id int, time date) PARTITION BY RANGE (time); -SET search_path = partitioning_schema; -SELECT create_distributed_table('"schema-test"', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE partitioning_schema."schema-test_2009" PARTITION OF "schema-test" FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); --- newly created partition is distributed as well -SELECT - logicalrelid -FROM - pg_dist_partition -WHERE - logicalrelid IN ('partitioning_schema."schema-test"'::regclass, 'partitioning_schema."schema-test_2009"'::regclass) -ORDER BY 1; - logicalrelid ---------------------------------------------------------------------- - "schema-test" - "schema-test_2009" -(2 rows) - -SELECT - logicalrelid, count(*) -FROM - pg_dist_shard -WHERE - logicalrelid IN ('partitioning_schema."schema-test"'::regclass, 'partitioning_schema."schema-test_2009"'::regclass) -GROUP BY - logicalrelid -ORDER BY - 1,2; - logicalrelid | count ---------------------------------------------------------------------- - "schema-test" | 4 - "schema-test_2009" | 4 -(2 rows) - --- test we don't deadlock when attaching and detaching partitions from partitioned --- tables with foreign keys -CREATE TABLE reference_table(id int PRIMARY KEY); -SELECT create_reference_table('reference_table'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE reference_table_2(id int PRIMARY KEY); -SELECT create_reference_table('reference_table_2'); - create_reference_table ---------------------------------------------------------------------- - -(1 row) - -CREATE TABLE partitioning_test(id int, time date) PARTITION BY RANGE (time); -CREATE TABLE partitioning_test_2008 PARTITION OF partitioning_test FOR VALUES FROM ('2008-01-01') TO ('2009-01-01'); -CREATE TABLE partitioning_test_2009 (LIKE partitioning_test); -CREATE TABLE partitioning_test_2010 (LIKE partitioning_test); -CREATE TABLE partitioning_test_2011 (LIKE partitioning_test); --- distributing partitioning_test will also distribute partitioning_test_2008 -SELECT create_distributed_table('partitioning_test', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('partitioning_test_2009', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('partitioning_test_2010', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -SELECT create_distributed_table('partitioning_test_2011', 'id'); - create_distributed_table ---------------------------------------------------------------------- - -(1 row) - -ALTER TABLE partitioning_test ADD CONSTRAINT partitioning_reference_fkey - FOREIGN KEY (id) REFERENCES reference_table(id) ON DELETE CASCADE; -ALTER TABLE partitioning_test_2009 ADD CONSTRAINT partitioning_reference_fkey_2009 - FOREIGN KEY (id) REFERENCES reference_table(id) ON DELETE CASCADE; -INSERT INTO partitioning_test_2010 VALUES (1, '2010-02-01'); --- This should fail because of foreign key constraint violation -ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010 - FOR VALUES FROM ('2010-01-01') TO ('2011-01-01'); -ERROR: insert or update on table "partitioning_test_2010_1660191" violates foreign key constraint "partitioning_reference_fkey_1660179" -DETAIL: Key (id)=(X) is not present in table "reference_table_1660177". -CONTEXT: while executing command on localhost:xxxxx --- Truncate, so attaching again won't fail -TRUNCATE partitioning_test_2010; --- Attach a table which already has the same constraint -ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2009 - FOR VALUES FROM ('2009-01-01') TO ('2010-01-01'); --- Attach a table which doesn't have the constraint -ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2010 - FOR VALUES FROM ('2010-01-01') TO ('2011-01-01'); --- Attach a table which has a different constraint -ALTER TABLE partitioning_test ATTACH PARTITION partitioning_test_2011 - FOR VALUES FROM ('2011-01-01') TO ('2012-01-01'); -ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2008; -ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2009; -ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2010; -ALTER TABLE partitioning_test DETACH PARTITION partitioning_test_2011; -DROP TABLE partitioning_test, partitioning_test_2008, partitioning_test_2009, - partitioning_test_2010, partitioning_test_2011, - reference_table, reference_table_2; -DROP SCHEMA partitioning_schema CASCADE; -NOTICE: drop cascades to table "schema-test" -RESET SEARCH_PATH; -DROP TABLE IF EXISTS - partitioning_hash_test, - partitioning_hash_join_test, - partitioning_test_failure, - non_distributed_partitioned_table, - partitioning_test_foreign_key; From e0d2ac76208aef6ae0be3bb38adc23016f889b3d Mon Sep 17 00:00:00 2001 From: Onder Kalaci Date: Tue, 3 Nov 2020 18:16:05 +0100 Subject: [PATCH 099/124] Do not rely on set_rel_pathlist_hook for finding local relations When a relation is used on an OUTER JOIN with FALSE filters, set_rel_pathlist_hook may not be called for the table. There might be other cases as well, so do not rely on the hook for classification of the tables. --- .../distributed/planner/distributed_planner.c | 23 +- .../planner/multi_router_planner.c | 12 +- .../relation_restriction_equivalence.c | 4 - src/include/distributed/distributed_planner.h | 6 +- .../regress/expected/materialized_view.out | 10 + .../expected/recursive_view_local_table.out | 201 ++++++++++++++++++ src/test/regress/multi_schedule | 2 +- src/test/regress/sql/materialized_view.sql | 5 + .../sql/recursive_view_local_table.sql | 55 +++++ 9 files changed, 297 insertions(+), 21 deletions(-) create mode 100644 src/test/regress/expected/recursive_view_local_table.out create mode 100644 src/test/regress/sql/recursive_view_local_table.sql diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 5f0795da4..964dfb943 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -1804,7 +1804,6 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, MemoryContext oldMemoryContext = MemoryContextSwitchTo(restrictionsMemoryContext); bool distributedTable = IsCitusTable(rte->relid); - bool localTable = !distributedTable; RelationRestriction *relationRestriction = palloc0(sizeof(RelationRestriction)); relationRestriction->index = restrictionIndex; @@ -1820,8 +1819,6 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, RelationRestrictionContext *relationRestrictionContext = plannerRestrictionContext->relationRestrictionContext; - relationRestrictionContext->hasDistributedRelation |= distributedTable; - relationRestrictionContext->hasLocalRelation |= localTable; /* * We're also keeping track of whether all participant @@ -2308,12 +2305,26 @@ GetRTEListProperties(List *rangeTableList) */ continue; } - else if (rangeTableEntry->relkind == RELKIND_MATVIEW) + + + if (rangeTableEntry->relkind == RELKIND_MATVIEW) { /* - * Skip over materialized views, here we should not consider - * materialized views as local tables. + * Record materialized views as they are similar to postgres local tables + * but it is nice to record them separately. + * + * Regular tables, partitioned tables or foreign tables can be a local or + * distributed tables and we can qualify them accurately. + * + * For regular views, we don't care because their definitions are already + * in the same query tree and we can detect what is inside the view definition. + * + * For materialized views, they are just local tables in the queries. But, when + * REFRESH MATERIALIZED VIEW is used, they behave similar to regular views, adds + * the view definition to the query. Hence, it is useful to record it seperately + * and let the callers decide on what to do. */ + rteListProperties->hasMaterializedView = true; continue; } diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index e314aa4b5..89e544374 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -2153,8 +2153,6 @@ PlanRouterQuery(Query *originalQuery, bool replacePrunedQueryWithDummy, bool *multiShardModifyQuery, Const **partitionValueConst) { - RelationRestrictionContext *relationRestrictionContext = - plannerRestrictionContext->relationRestrictionContext; bool isMultiShardQuery = false; DeferredErrorMessage *planningError = NULL; bool shardsPresent = false; @@ -2267,13 +2265,15 @@ PlanRouterQuery(Query *originalQuery, /* we need anchor shard id for select queries with router planner */ uint64 shardId = GetAnchorShardId(*prunedShardIntervalListList); - bool hasLocalRelation = relationRestrictionContext->hasLocalRelation; - + /* both Postgres tables and materialized tables are locally avaliable */ + RTEListProperties *rteProperties = GetRTEListPropertiesForQuery(originalQuery); + bool hasPostgresLocalRelation = + rteProperties->hasPostgresLocalTable || rteProperties->hasMaterializedView; List *taskPlacementList = CreateTaskPlacementListForShardIntervals(*prunedShardIntervalListList, shardsPresent, replacePrunedQueryWithDummy, - hasLocalRelation); + hasPostgresLocalRelation); if (taskPlacementList == NIL) { planningError = DeferredError(ERRCODE_FEATURE_NOT_SUPPORTED, @@ -3554,8 +3554,6 @@ CopyRelationRestrictionContext(RelationRestrictionContext *oldContext) (RelationRestrictionContext *) palloc(sizeof(RelationRestrictionContext)); ListCell *relationRestrictionCell = NULL; - newContext->hasDistributedRelation = oldContext->hasDistributedRelation; - newContext->hasLocalRelation = oldContext->hasLocalRelation; newContext->allReferenceTables = oldContext->allReferenceTables; newContext->relationRestrictionList = NIL; diff --git a/src/backend/distributed/planner/relation_restriction_equivalence.c b/src/backend/distributed/planner/relation_restriction_equivalence.c index 438fac555..b52146611 100644 --- a/src/backend/distributed/planner/relation_restriction_equivalence.c +++ b/src/backend/distributed/planner/relation_restriction_equivalence.c @@ -1820,10 +1820,6 @@ FilterPlannerRestrictionForQuery(PlannerRestrictionContext *plannerRestrictionCo filteredRelationRestrictionContext->allReferenceTables = (totalRelationCount == referenceRelationCount); - /* we currently don't support local relations and we cannot come up to this point */ - filteredRelationRestrictionContext->hasLocalRelation = false; - filteredRelationRestrictionContext->hasDistributedRelation = true; - /* finally set the relation and join restriction contexts */ filteredPlannerRestrictionContext->relationRestrictionContext = filteredRelationRestrictionContext; diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index 4abe52d02..fc7af1652 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -40,8 +40,6 @@ extern int PlannerLevel; typedef struct RelationRestrictionContext { - bool hasDistributedRelation; - bool hasLocalRelation; bool allReferenceTables; List *relationRestrictionList; } RelationRestrictionContext; @@ -148,8 +146,10 @@ typedef struct RTEListProperties /* includes hash, append and range partitioned tables */ bool hasDistributedTable; - /* union of above three */ + /* union of hasReferenceTable, hasCitusLocalTable and hasDistributedTable */ bool hasCitusTable; + + bool hasMaterializedView; } RTEListProperties; diff --git a/src/test/regress/expected/materialized_view.out b/src/test/regress/expected/materialized_view.out index a8299e175..4ef7818a2 100644 --- a/src/test/regress/expected/materialized_view.out +++ b/src/test/regress/expected/materialized_view.out @@ -29,6 +29,15 @@ SELECT count(*) FROM temp_lineitem; 1706 (1 row) +-- can create router materialized views +CREATE MATERIALIZED VIEW mode_counts_router +AS SELECT l_shipmode, count(*) FROM temp_lineitem WHERE l_orderkey = 1 GROUP BY l_shipmode; +SELECT * FROM mode_counts_router; + l_shipmode | count +--------------------------------------------------------------------- + AIR | 1 +(1 row) + -- can create and query materialized views CREATE MATERIALIZED VIEW mode_counts AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode; @@ -59,6 +68,7 @@ SELECT * FROM mode_counts WHERE l_shipmode = 'AIR' ORDER BY 2 DESC, 1 LIMIT 10; DROP MATERIALIZED VIEW mode_counts; DROP TABLE temp_lineitem CASCADE; +NOTICE: drop cascades to materialized view mode_counts_router -- Refresh single-shard materialized view CREATE MATERIALIZED VIEW materialized_view AS SELECT orders_hash_part.o_orderdate, total_price.price_sum diff --git a/src/test/regress/expected/recursive_view_local_table.out b/src/test/regress/expected/recursive_view_local_table.out new file mode 100644 index 000000000..a2306a2e9 --- /dev/null +++ b/src/test/regress/expected/recursive_view_local_table.out @@ -0,0 +1,201 @@ +CREATE SCHEMA postgres_local_table; +SET search_path TO postgres_local_table; +CREATE TABLE local_table(a INT); +INSERT INTO local_table VALUES (1),(2),(3); +CREATE RECURSIVE VIEW recursive_view(val_1, val_2) AS +( + VALUES(0,1) + UNION ALL + SELECT GREATEST(val_1,val_2),val_1 + val_2 AS local_table + FROM + recursive_view + WHERE val_2 < 50 +); +CREATE RECURSIVE VIEW recursive_defined_non_recursive_view(c) AS (SELECT 1 FROM local_table); +CREATE TABLE ref_table(a int, b INT); +SELECT create_reference_table('ref_table'); + create_reference_table +--------------------------------------------------------------------- + +(1 row) + +INSERT INTO ref_table VALUES (1,1); +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_view WHERE FALSE) AS sub ON FALSE; + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_view WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_view WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 +(10 rows) + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 + 1 | 1 + 1 | 1 +(3 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_view WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_view WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_view WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 + 1 | 1 +(10 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 + 1 | 1 + 1 | 1 +(3 rows) + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 + 1 | 1 + 1 | 1 +(3 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; + a | b +--------------------------------------------------------------------- + 1 | 1 + 1 | 1 + 1 | 1 +(3 rows) + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM local_table l WHERE l.a = ref_table.a); +ERROR: direct joins between distributed and local tables are not supported +HINT: Use CTE's or subqueries to select from local tables and use them in joins +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM local_table l WHERE l.a = ref_table.a) AND false; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM local_table l WHERE l.a = ref_table.a AND false); + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_view l WHERE l.val_1 = ref_table.a); + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_view l WHERE l.val_1 = ref_table.a) AND false; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_view l WHERE l.val_1 = ref_table.a AND false); + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_defined_non_recursive_view l WHERE l.c = ref_table.a); + a | b +--------------------------------------------------------------------- + 1 | 1 +(1 row) + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_defined_non_recursive_view l WHERE l.c = ref_table.a) AND false; + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_defined_non_recursive_view l WHERE l.c = ref_table.a AND false); + a | b +--------------------------------------------------------------------- +(0 rows) + +DROP SCHEMA postgres_local_table CASCADE; +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table local_table +drop cascades to view recursive_view +drop cascades to view recursive_defined_non_recursive_view +drop cascades to table ref_table diff --git a/src/test/regress/multi_schedule b/src/test/regress/multi_schedule index c9e60c809..4f6ed8ee4 100644 --- a/src/test/regress/multi_schedule +++ b/src/test/regress/multi_schedule @@ -85,7 +85,7 @@ test: set_operation_and_local_tables test: subqueries_deep subquery_view subquery_partitioning subquery_complex_target_list subqueries_not_supported subquery_in_where test: non_colocated_leaf_subquery_joins non_colocated_subquery_joins non_colocated_join_order -test: subquery_prepared_statements pg12 cte_inline pg13 +test: subquery_prepared_statements pg12 cte_inline pg13 recursive_view_local_table test: tableam # ---------- diff --git a/src/test/regress/sql/materialized_view.sql b/src/test/regress/sql/materialized_view.sql index 4578f6086..c0e05db05 100644 --- a/src/test/regress/sql/materialized_view.sql +++ b/src/test/regress/sql/materialized_view.sql @@ -19,6 +19,11 @@ SELECT count(*) FROM temp_lineitem; INSERT INTO temp_lineitem SELECT * FROM air_shipped_lineitems WHERE l_shipmode = 'MAIL'; SELECT count(*) FROM temp_lineitem; +-- can create router materialized views +CREATE MATERIALIZED VIEW mode_counts_router +AS SELECT l_shipmode, count(*) FROM temp_lineitem WHERE l_orderkey = 1 GROUP BY l_shipmode; +SELECT * FROM mode_counts_router; + -- can create and query materialized views CREATE MATERIALIZED VIEW mode_counts AS SELECT l_shipmode, count(*) FROM temp_lineitem GROUP BY l_shipmode; diff --git a/src/test/regress/sql/recursive_view_local_table.sql b/src/test/regress/sql/recursive_view_local_table.sql new file mode 100644 index 000000000..c33a95e99 --- /dev/null +++ b/src/test/regress/sql/recursive_view_local_table.sql @@ -0,0 +1,55 @@ +CREATE SCHEMA postgres_local_table; +SET search_path TO postgres_local_table; + +CREATE TABLE local_table(a INT); +INSERT INTO local_table VALUES (1),(2),(3); + +CREATE RECURSIVE VIEW recursive_view(val_1, val_2) AS +( + VALUES(0,1) + UNION ALL + SELECT GREATEST(val_1,val_2),val_1 + val_2 AS local_table + FROM + recursive_view + WHERE val_2 < 50 +); + +CREATE RECURSIVE VIEW recursive_defined_non_recursive_view(c) AS (SELECT 1 FROM local_table); + +CREATE TABLE ref_table(a int, b INT); +SELECT create_reference_table('ref_table'); +INSERT INTO ref_table VALUES (1,1); + +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_view WHERE FALSE) AS sub ON FALSE; +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_view WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_view WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_view WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_view WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_view WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM (SELECT 1, random() FROM local_table) as s WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table LEFT OUTER JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE FALSE) AS sub ON FALSE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE FALSE) AS sub ON TRUE ORDER BY 1,2; +SELECT ref_table.* FROM ref_table JOIN (SELECT * FROM recursive_defined_non_recursive_view WHERE TRUE) AS sub ON TRUE ORDER BY 1,2; + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM local_table l WHERE l.a = ref_table.a); +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM local_table l WHERE l.a = ref_table.a) AND false; +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM local_table l WHERE l.a = ref_table.a AND false); + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_view l WHERE l.val_1 = ref_table.a); +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_view l WHERE l.val_1 = ref_table.a) AND false; +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_view l WHERE l.val_1 = ref_table.a AND false); + +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_defined_non_recursive_view l WHERE l.c = ref_table.a); +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_defined_non_recursive_view l WHERE l.c = ref_table.a) AND false; +SELECT ref_table.* FROM ref_table WHERE EXISTS (SELECT * FROM recursive_defined_non_recursive_view l WHERE l.c = ref_table.a AND false); + + +DROP SCHEMA postgres_local_table CASCADE; From 5d5966f700bbe3b53bca1e969147e7255be42a13 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Fri, 6 Nov 2020 14:53:30 +0300 Subject: [PATCH 100/124] Fix a flaky test in mixed_relkind_tests (#4300) --- src/test/regress/expected/mixed_relkind_tests.out | 10 +++++----- src/test/regress/sql/mixed_relkind_tests.sql | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/regress/expected/mixed_relkind_tests.out b/src/test/regress/expected/mixed_relkind_tests.out index 27b7f26e4..1185b0d00 100644 --- a/src/test/regress/expected/mixed_relkind_tests.out +++ b/src/test/regress/expected/mixed_relkind_tests.out @@ -212,15 +212,15 @@ SELECT * FROM mat_view_on_dist UNION SELECT 1 ORDER BY 1; SET client_min_messages TO DEBUG1; -- can push down the union in subquery -SELECT * FROM (SELECT * FROM partitioned_distributed_table UNION SELECT * FROM partitioned_distributed_table) AS foo; +SELECT * FROM (SELECT * FROM partitioned_distributed_table UNION SELECT * FROM partitioned_distributed_table) AS foo ORDER BY 1,2; a | b --------------------------------------------------------------------- - 1 | 2 - 5 | 6 - 4 | 5 - 3 | 4 0 | 1 + 1 | 2 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 (6 rows) -- cannot push down the subquery, should evaluate subquery by creating a subplan diff --git a/src/test/regress/sql/mixed_relkind_tests.sql b/src/test/regress/sql/mixed_relkind_tests.sql index 62ba66508..795307d65 100644 --- a/src/test/regress/sql/mixed_relkind_tests.sql +++ b/src/test/regress/sql/mixed_relkind_tests.sql @@ -78,7 +78,7 @@ SELECT * FROM mat_view_on_dist UNION SELECT 1 ORDER BY 1; SET client_min_messages TO DEBUG1; -- can push down the union in subquery -SELECT * FROM (SELECT * FROM partitioned_distributed_table UNION SELECT * FROM partitioned_distributed_table) AS foo; +SELECT * FROM (SELECT * FROM partitioned_distributed_table UNION SELECT * FROM partitioned_distributed_table) AS foo ORDER BY 1,2; -- cannot push down the subquery, should evaluate subquery by creating a subplan SELECT COUNT(*) FROM (SELECT b, random() FROM partitioned_distributed_table GROUP BY b) AS foo; From d3019f1b6d153177cc9d64fa4b8190a655e7bc57 Mon Sep 17 00:00:00 2001 From: Hanefi Onaldi Date: Mon, 9 Nov 2020 12:03:59 +0300 Subject: [PATCH 101/124] Introduce foreach_ptr_modify macro (#4303) If one wishes to iterate through a List and insert list elements in PG13, it is not safe to use for_each_ptr as the List representation in PostgreSQL no longer linked lists, but arrays, and it is possible that the whole array is repalloc'ed if ther is not sufficient space available. See postgres commit 1cff1b95ab6ddae32faa3efe0d95a820dbfdc164 for more information --- src/backend/distributed/metadata/dependency.c | 2 +- src/include/distributed/listutils.h | 32 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/backend/distributed/metadata/dependency.c b/src/backend/distributed/metadata/dependency.c index 6dacf2cc3..81106207b 100644 --- a/src/backend/distributed/metadata/dependency.c +++ b/src/backend/distributed/metadata/dependency.c @@ -1097,7 +1097,7 @@ GetDependingViews(Oid relationId) List *dependingViews = NIL; List *nodeQueue = list_make1(tableNode); ViewDependencyNode *node = NULL; - foreach_ptr(node, nodeQueue) + foreach_ptr_append(node, nodeQueue) { ViewDependencyNode *dependingNode = NULL; foreach_ptr(dependingNode, node->dependingNodes) diff --git a/src/include/distributed/listutils.h b/src/include/distributed/listutils.h index ddc14c7a2..3a177f7d9 100644 --- a/src/include/distributed/listutils.h +++ b/src/include/distributed/listutils.h @@ -80,6 +80,38 @@ typedef struct ListCellAndListWrapper (((var) = lfirst_oid(var ## CellDoNotUse)) || true); \ var ## CellDoNotUse = lnext_compat(l, var ## CellDoNotUse)) +/* + * foreach_ptr_append - + * a convenience macro which loops through a pointer List and can append list + * elements without needing a ListCell or and index variable, just a declared + * pointer variable to store the iterated values. + * + * PostgreSQL 13 changed the representation of Lists to expansible arrays, + * not chains of cons-cells. This changes the costs for accessing and + * mutating List contents. Therefore different implementations are provided. + * + * For more information, see postgres commit with sha + * 1cff1b95ab6ddae32faa3efe0d95a820dbfdc164 + */ +#if PG_VERSION_NUM >= PG_VERSION_13 + +/* + * How it works: + * - An index is declared with the name {var}PositionDoNotUse and used + * throughout the for loop using ## to concat. + * - To assign to var it needs to be done in the condition of the for loop, + * because we cannot use the initializer since the index variable is + * declared there. + * - || true is used to always enter the loop even if var is NULL. + */ +#define foreach_ptr_append(var, l) \ + for (int var ## PositionDoNotUse = 0; \ + (var ## PositionDoNotUse) < list_length(l) && \ + (((var) = list_nth(l, var ## PositionDoNotUse)) || true); \ + var ## PositionDoNotUse ++) +#else +#define foreach_ptr_append(var, l) foreach_ptr(var, l) +#endif /* utility functions declaration shared within this module */ extern List * SortList(List *pointerList, From 5e3dc9d7075648441519b9cde1c2383f4c4343f2 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 9 Nov 2020 12:47:16 +0300 Subject: [PATCH 102/124] Bump citus version to 10.0devel --- configure | 18 +++++++-------- configure.in | 2 +- src/backend/distributed/citus.control | 2 +- .../distributed/sql/citus--10.0-1--9.5-1.sql | 2 ++ .../distributed/sql/citus--9.5-1--10.0-1.sql | 3 +++ src/test/regress/expected/multi_extension.out | 22 ++++++++++++++++--- src/test/regress/sql/multi_extension.sql | 10 +++++++++ src/test/regress/upgrade/config.py | 2 +- 8 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 src/backend/distributed/sql/citus--10.0-1--9.5-1.sql create mode 100644 src/backend/distributed/sql/citus--9.5-1--10.0-1.sql diff --git a/configure b/configure index de36e580e..9d088c3e0 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for Citus 9.5devel. +# Generated by GNU Autoconf 2.69 for Citus 10.0devel. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -579,8 +579,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='Citus' PACKAGE_TARNAME='citus' -PACKAGE_VERSION='9.5devel' -PACKAGE_STRING='Citus 9.5devel' +PACKAGE_VERSION='10.0devel' +PACKAGE_STRING='Citus 10.0devel' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1242,7 +1242,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures Citus 9.5devel to adapt to many kinds of systems. +\`configure' configures Citus 10.0devel to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1303,7 +1303,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of Citus 9.5devel:";; + short | recursive ) echo "Configuration of Citus 10.0devel:";; esac cat <<\_ACEOF @@ -1403,7 +1403,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -Citus configure 9.5devel +Citus configure 10.0devel generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1886,7 +1886,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by Citus $as_me 9.5devel, which was +It was created by Citus $as_me 10.0devel, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -5055,7 +5055,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by Citus $as_me 9.5devel, which was +This file was extended by Citus $as_me 10.0devel, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -5117,7 +5117,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -Citus config.status 9.5devel +Citus config.status 10.0devel configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/configure.in b/configure.in index 2155a37ca..58bcc906e 100644 --- a/configure.in +++ b/configure.in @@ -5,7 +5,7 @@ # everyone needing autoconf installed, the resulting files are checked # into the SCM. -AC_INIT([Citus], [9.5devel]) +AC_INIT([Citus], [10.0devel]) AC_COPYRIGHT([Copyright (c) Citus Data, Inc.]) # we'll need sed and awk for some of the version commands diff --git a/src/backend/distributed/citus.control b/src/backend/distributed/citus.control index e5189327c..296cef23c 100644 --- a/src/backend/distributed/citus.control +++ b/src/backend/distributed/citus.control @@ -1,6 +1,6 @@ # Citus extension comment = 'Citus distributed database' -default_version = '9.5-1' +default_version = '10.0-1' module_pathname = '$libdir/citus' relocatable = false schema = pg_catalog diff --git a/src/backend/distributed/sql/citus--10.0-1--9.5-1.sql b/src/backend/distributed/sql/citus--10.0-1--9.5-1.sql new file mode 100644 index 000000000..e07dcfbd8 --- /dev/null +++ b/src/backend/distributed/sql/citus--10.0-1--9.5-1.sql @@ -0,0 +1,2 @@ +-- citus--10.0-1--9.5-1 +-- this is an empty downgrade path since citus--9.5-1--10.0-1.sql is empty for now diff --git a/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql b/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql new file mode 100644 index 000000000..fb96a78fe --- /dev/null +++ b/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql @@ -0,0 +1,3 @@ +-- citus--9.5-1--10.0-1 + +-- bump version to 10.0-1 diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index b640a8aa3..1f8434840 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -453,12 +453,28 @@ SELECT * FROM print_extension_changes(); | function worker_record_sequence_dependency(regclass,regclass,name) (10 rows) +-- Test downgrade to 9.5-1 from 10.0-1 +ALTER EXTENSION citus UPDATE TO '10.0-1'; +ALTER EXTENSION citus UPDATE TO '9.5-1'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + +-- Snapshot of state at 10.0-1 +ALTER EXTENSION citus UPDATE TO '10.0-1'; +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + DROP TABLE prev_objects, extension_diff; -- show running version SHOW citus.version; citus.version --------------------------------------------------------------------- - 9.5devel + 10.0devel (1 row) -- ensure no objects were created outside pg_catalog @@ -480,7 +496,7 @@ RESET citus.enable_version_checks; DROP EXTENSION citus; CREATE EXTENSION citus VERSION '7.0-1'; ERROR: specified version incompatible with loaded Citus library -DETAIL: Loaded library requires 9.5, but 7.0-1 was specified. +DETAIL: Loaded library requires 10.0, but 7.0-1 was specified. HINT: If a newer library is present, restart the database and try the command again. -- Test non-distributed queries work even in version mismatch SET citus.enable_version_checks TO 'false'; @@ -523,7 +539,7 @@ ORDER BY 1; -- We should not distribute table in version mistmatch SELECT create_distributed_table('version_mismatch_table', 'column1'); ERROR: loaded Citus library version differs from installed extension version -DETAIL: Loaded library requires 9.5, but the installed extension version is 7.1-1. +DETAIL: Loaded library requires 10.0, but the installed extension version is 7.1-1. HINT: Run ALTER EXTENSION citus UPDATE and try again. -- This function will cause fail in next ALTER EXTENSION CREATE OR REPLACE FUNCTION pg_catalog.master_dist_authinfo_cache_invalidate() diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index fe4163f15..49c5413d4 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -215,6 +215,16 @@ SELECT * FROM print_extension_changes(); ALTER EXTENSION citus UPDATE TO '9.5-1'; SELECT * FROM print_extension_changes(); +-- Test downgrade to 9.5-1 from 10.0-1 +ALTER EXTENSION citus UPDATE TO '10.0-1'; +ALTER EXTENSION citus UPDATE TO '9.5-1'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + +-- Snapshot of state at 10.0-1 +ALTER EXTENSION citus UPDATE TO '10.0-1'; +SELECT * FROM print_extension_changes(); + DROP TABLE prev_objects, extension_diff; -- show running version diff --git a/src/test/regress/upgrade/config.py b/src/test/regress/upgrade/config.py index feb90d92c..d50433637 100644 --- a/src/test/regress/upgrade/config.py +++ b/src/test/regress/upgrade/config.py @@ -9,7 +9,7 @@ BEFORE_CITUS_UPGRADE_COORD_SCHEDULE = './before_citus_upgrade_coord_schedule' MASTER = 'master' # This should be updated when citus version changes -MASTER_VERSION = '9.5' +MASTER_VERSION = '10.0' HOME = expanduser("~") From 4bf754b245fccae24f324d42b43cfe646270defd Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 9 Nov 2020 16:43:56 +0300 Subject: [PATCH 103/124] Fix location of citus--10.0-1--9.5-1.sql downgrade script (#4306) --- .../distributed/sql/{ => downgrades}/citus--10.0-1--9.5-1.sql | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/backend/distributed/sql/{ => downgrades}/citus--10.0-1--9.5-1.sql (100%) diff --git a/src/backend/distributed/sql/citus--10.0-1--9.5-1.sql b/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql similarity index 100% rename from src/backend/distributed/sql/citus--10.0-1--9.5-1.sql rename to src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql From 52a5ab07516e3126a89f283a2fa38529110ae5b7 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Mon, 9 Nov 2020 15:21:38 +0300 Subject: [PATCH 104/124] Update CHANGELOG for 9.5.0 --- CHANGELOG.md | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ceac0ba72..3e7de2259 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,95 @@ +### citus v9.5.0 (November 10, 2020) ### + +* Adds support for PostgreSQL 13 + +* Removes the task-tracker executor + +* Introduces citus local tables + +* Introduces `undistribute_table` UDF to convert tables back to postgres tables + +* Adds support for `EXPLAIN (ANALYZE) EXECUTE` and `EXPLAIN EXECUTE` + +* Adds support for `EXPLAIN (ANALYZE, WAL)` for PG13 + +* Sorts the output of `EXPLAIN (ANALYZE)` by execution duration. + +* Adds support for CREATE TABLE ... USING table_access_method + +* Adds support for `WITH TIES` option in SELECT and INSERT SELECT queries + +* Avoids taking multi-shard locks on workers + +* Enforces `citus.max_shared_pool_size` config in COPY queries + +* Enables custom aggregates with multiple parameters to be executed on workers + +* Enforces `citus.max_intermediate_result_size` in local execution + +* Improves cost estimation of INSERT SELECT plans + +* Introduces delegation of procedures that read from reference tables + +* Prevents pull-push execution for simple pushdownable subqueries + +* Improves error message when creating a foreign key to a local table + +* Makes `citus_prepare_pg_upgrade` idempotent by dropping transition tables + +* Disallows `ON TRUE` outer joins with reference & distributed tables when + reference table is outer relation to avoid incorrect results + +* Disallows field indirection in INSERT/UPDATE queries to avoid incorrect + results + +* Disallows volatile functions in UPDATE subqueries to avoid incorrect results + +* Fixes CREATE INDEX CONCURRENTLY crash with local execution + +* Fixes `citus_finish_pg_upgrade` to drop all backup tables + +* Fixes a bug that cause failures when `RECURSIVE VIEW` joined reference table + +* Fixes DROP SEQUENCE failures when metadata syncing is enabled + +* Fixes a bug that caused CREATE TABLE with CHECK constraint to fail + +* Fixes a bug that could cause VACUUM to deadlock + +* Fixes master_update_node failure when no background worker slots are available + +* Fixes a bug that caused replica identity to not be propagated on shard repair + +* Fixes a bug that could cause crashes after connection timeouts + +* Fixes a bug that could cause crashes with certain compile flags + +* Fixes a bug that could cause deadlocks on CREATE INDEX + +* Fixes a bug with genetic query optimization in outer joins + +* Fixes a crash when aggregating empty tables + +* Fixes a crash with inserting domain constrained composite types + +* Fixes a crash with multi-row & router INSERT's in local execution + +* Fixes a possibility of doing temporary file cleanup more than once + +* Fixes incorrect setting of join related fields + +* Fixes memory issues around deparsing index commands + +* Fixes reference table access tracking for sequential execution + +* Fixes removal of a single node with only reference tables + +* Fixes sending commands to coordinator when it is added as a worker + +* Fixes write queries with const expressions and COLLATE in various places + +* Fixes wrong cancellation message about distributed deadlock + ### citus v9.4.2 (October 21, 2020) ### * Fixes a bug that could lead to multiple maintenance daemons From 7d1480007163a69ad9db29d6ec68ff36d20eeeb9 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Wed, 11 Nov 2020 15:43:04 +0100 Subject: [PATCH 105/124] add placeholder for enterprise modules --- src/backend/distributed/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/distributed/Makefile b/src/backend/distributed/Makefile index d2f441250..507c72d9d 100644 --- a/src/backend/distributed/Makefile +++ b/src/backend/distributed/Makefile @@ -19,6 +19,8 @@ DATA_built = $(generated_sql_files) # directories with source files SUBDIRS = . commands connection ddl deparser executor metadata operations planner progress relay safeclib test transaction utils worker +# enterprise modules +SUBDIRS += # Symlinks are not copied over to the build directory if a separete build # directory is used during configure (such as on CI) From 0c0fc69f2a3ebe76aa974551610dd0c7424b8237 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=96nder=20Kalac=C4=B1?= Date: Tue, 17 Nov 2020 11:41:57 +0100 Subject: [PATCH 106/124] Remove unused field (#4275) --- src/backend/distributed/planner/distributed_planner.c | 1 - src/backend/distributed/planner/multi_router_planner.c | 4 ---- src/include/distributed/distributed_planner.h | 1 - 3 files changed, 6 deletions(-) diff --git a/src/backend/distributed/planner/distributed_planner.c b/src/backend/distributed/planner/distributed_planner.c index 964dfb943..21de545e2 100644 --- a/src/backend/distributed/planner/distributed_planner.c +++ b/src/backend/distributed/planner/distributed_planner.c @@ -1812,7 +1812,6 @@ multi_relation_restriction_hook(PlannerInfo *root, RelOptInfo *relOptInfo, relationRestriction->relOptInfo = relOptInfo; relationRestriction->distributedRelation = distributedTable; relationRestriction->plannerInfo = root; - relationRestriction->prunedShardIntervalList = NIL; /* see comments on GetVarFromAssignedParam() */ relationRestriction->outerPlanParamsList = OuterPlanParamsList(root); diff --git a/src/backend/distributed/planner/multi_router_planner.c b/src/backend/distributed/planner/multi_router_planner.c index 89e544374..ae4a5e312 100644 --- a/src/backend/distributed/planner/multi_router_planner.c +++ b/src/backend/distributed/planner/multi_router_planner.c @@ -2651,8 +2651,6 @@ TargetShardIntervalsForRestrictInfo(RelationRestrictionContext *restrictionConte List *joinInfoList = relationRestriction->relOptInfo->joininfo; List *pseudoRestrictionList = extract_actual_clauses(joinInfoList, true); - relationRestriction->prunedShardIntervalList = NIL; - /* * Queries may have contradiction clauses like 'false', or '1=0' in * their filters. Such queries would have pseudo constant 'false' @@ -2682,7 +2680,6 @@ TargetShardIntervalsForRestrictInfo(RelationRestrictionContext *restrictionConte } } - relationRestriction->prunedShardIntervalList = prunedShardIntervalList; prunedShardIntervalListList = lappend(prunedShardIntervalListList, prunedShardIntervalList); } @@ -3581,7 +3578,6 @@ CopyRelationRestrictionContext(RelationRestrictionContext *oldContext) /* not copyable, but readonly */ newRestriction->plannerInfo = oldRestriction->plannerInfo; - newRestriction->prunedShardIntervalList = oldRestriction->prunedShardIntervalList; newContext->relationRestrictionList = lappend(newContext->relationRestrictionList, newRestriction); diff --git a/src/include/distributed/distributed_planner.h b/src/include/distributed/distributed_planner.h index fc7af1652..2a0433e07 100644 --- a/src/include/distributed/distributed_planner.h +++ b/src/include/distributed/distributed_planner.h @@ -64,7 +64,6 @@ typedef struct RelationRestriction RangeTblEntry *rte; RelOptInfo *relOptInfo; PlannerInfo *plannerInfo; - List *prunedShardIntervalList; /* list of RootPlanParams for all outer nodes */ List *outerPlanParamsList; From 34de1f645c49e5cf1e238ae05d7caa4841a5c780 Mon Sep 17 00:00:00 2001 From: SaitTalhaNisanci Date: Tue, 17 Nov 2020 19:16:08 +0300 Subject: [PATCH 107/124] Update failure test dependencies (#4284) * Update failure test dependencies There was a security alert for cryptography. The vulnerability was fixed in 3.2.0. The vulnebarility: "RSA decryption was vulnerable to Bleichenbacher timing vulnerabilities, which would impact people using RSA decryption in online scenarios." The fix: https://github.com/pyca/cryptography/commit/58494b41d6ecb0f56b7c5f05d5f5e3ca0320d494 It wasn't enough to only update crpytography because mitm was incompatible with the new version, so mitm is also upgraded. The steps to do in local: python -m pip install -U cryptography python -m pip install -U mitmproxy --- src/test/regress/Pipfile | 3 +- src/test/regress/Pipfile.lock | 575 ++++++++++++++++++++++++---------- 2 files changed, 407 insertions(+), 171 deletions(-) diff --git a/src/test/regress/Pipfile b/src/test/regress/Pipfile index 7cc0276c1..8104a021e 100644 --- a/src/test/regress/Pipfile +++ b/src/test/regress/Pipfile @@ -4,9 +4,10 @@ url = "https://pypi.python.org/simple" verify_ssl = true [packages] -mitmproxy = "==4.0.4" +mitmproxy = "==5.3.0" construct = "==2.9.45" docopt = "==0.6.2" +cryptography = "==3.2.1" [dev-packages] diff --git a/src/test/regress/Pipfile.lock b/src/test/regress/Pipfile.lock index 6ac8ca351..a88851e17 100644 --- a/src/test/regress/Pipfile.lock +++ b/src/test/regress/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "b275a748e05be7bb653f589f10e279838e2401900af6c211a00edb1212252a1c" + "sha256": "f8df9d3f82264315551af66b9cbc76a5e0c5ea656feaef30b3c8235e15d64e43" }, "pipfile-spec": 6, "requires": { @@ -16,12 +16,12 @@ ] }, "default": { - "asn1crypto": { + "asgiref": { "hashes": [ - "sha256:2f1adbb7546ed199e3c90ef23ec95c5cf3585bac7d11fb7eb562a3fe89c64e87", - "sha256:9d5c20441baf0cb60a4ac34cc447c6c189024b6b4c6cd7877034f4965c464e49" + "sha256:a5098bc870b80e7b872bff60bb363c7f2c2c89078759f6c47b53ff8c525a152e", + "sha256:cd88907ecaec59d78e4ac00ea665b03e571cb37e3a0e37b3702af1a9e86c365a" ], - "version": "==0.24.0" + "version": "==3.3.0" }, "blinker": { "hashes": [ @@ -29,88 +29,95 @@ ], "version": "==1.4" }, - "brotlipy": { + "brotli": { "hashes": [ - "sha256:07194f4768eb62a4f4ea76b6d0df6ade185e24ebd85877c351daa0a069f1111a", - "sha256:091b299bf36dd6ef7a06570dbc98c0f80a504a56c5b797f31934d2ad01ae7d17", - "sha256:09ec3e125d16749b31c74f021aba809541b3564e5359f8c265cbae442810b41a", - "sha256:0be698678a114addcf87a4b9496c552c68a2c99bf93cf8e08f5738b392e82057", - "sha256:0fa6088a9a87645d43d7e21e32b4a6bf8f7c3939015a50158c10972aa7f425b7", - "sha256:1379347337dc3d20b2d61456d44ccce13e0625db2611c368023b4194d5e2477f", - "sha256:1ea4e578241504b58f2456a6c69952c88866c794648bdc74baee74839da61d44", - "sha256:2699945a0a992c04fc7dc7fa2f1d0575a2c8b4b769f2874a08e8eae46bef36ae", - "sha256:2a80319ae13ea8dd60ecdc4f5ccf6da3ae64787765923256b62c598c5bba4121", - "sha256:2e5c64522364a9ebcdf47c5744a5ddeb3f934742d31e61ebfbbc095460b47162", - "sha256:36def0b859beaf21910157b4c33eb3b06d8ce459c942102f16988cca6ea164df", - "sha256:3a3e56ced8b15fbbd363380344f70f3b438e0fd1fcf27b7526b6172ea950e867", - "sha256:3c1d5e2cf945a46975bdb11a19257fa057b67591eb232f393d260e7246d9e571", - "sha256:4e4638b49835d567d447a2cfacec109f9a777f219f071312268b351b6839436d", - "sha256:50ca336374131cfad20612f26cc43c637ac0bfd2be3361495e99270883b52962", - "sha256:5de6f7d010b7558f72f4b061a07395c5c3fd57f0285c5af7f126a677b976a868", - "sha256:637847560d671657f993313ecc6c6c6666a936b7a925779fd044065c7bc035b9", - "sha256:653faef61241bf8bf99d73ca7ec4baa63401ba7b2a2aa88958394869379d67c7", - "sha256:786afc8c9bd67de8d31f46e408a3386331e126829114e4db034f91eacb05396d", - "sha256:79aaf217072840f3e9a3b641cccc51f7fc23037496bd71e26211856b93f4b4cb", - "sha256:7e31f7adcc5851ca06134705fcf3478210da45d35ad75ec181e1ce9ce345bb38", - "sha256:8b39abc3256c978f575df5cd7893153277216474f303e26f0e43ba3d3969ef96", - "sha256:9448227b0df082e574c45c983fa5cd4bda7bfb11ea6b59def0940c1647be0c3c", - "sha256:96bc59ff9b5b5552843dc67999486a220e07a0522dddd3935da05dc194fa485c", - "sha256:a07647886e24e2fb2d68ca8bf3ada398eb56fd8eac46c733d4d95c64d17f743b", - "sha256:af65d2699cb9f13b26ec3ba09e75e80d31ff422c03675fcb36ee4dabe588fdc2", - "sha256:b4c98b0d2c9c7020a524ca5bbff42027db1004c6571f8bc7b747f2b843128e7a", - "sha256:c6cc0036b1304dd0073eec416cb2f6b9e37ac8296afd9e481cac3b1f07f9db25", - "sha256:d2c1c724c4ac375feb2110f1af98ecdc0e5a8ea79d068efb5891f621a5b235cb", - "sha256:dc6c5ee0df9732a44d08edab32f8a616b769cc5a4155a12d2d010d248eb3fb07", - "sha256:fd1d1c64214af5d90014d82cee5d8141b13d44c92ada7a0c0ec0679c6f15a471" + "sha256:160c78292e98d21e73a4cc7f76a234390e516afcd982fa17e1422f7c6a9ce9c8", + "sha256:16d528a45c2e1909c2798f27f7bf0a3feec1dc9e50948e738b961618e38b6a7b", + "sha256:1c48472a6ba3b113452355b9af0a60da5c2ae60477f8feda8346f8fd48e3e87c", + "sha256:268fe94547ba25b58ebc724680609c8ee3e5a843202e9a381f6f9c5e8bdb5c70", + "sha256:269a5743a393c65db46a7bb982644c67ecba4b8d91b392403ad8a861ba6f495f", + "sha256:35a3edbe18e876e596553c4007a087f8bcfd538f19bc116917b3c7522fca0429", + "sha256:3b78a24b5fd13c03ee2b7b86290ed20efdc95da75a3557cc06811764d5ad1126", + "sha256:40d15c79f42e0a2c72892bf407979febd9cf91f36f495ffb333d1d04cebb34e4", + "sha256:4d1b810aa0ed773f81dceda2cc7b403d01057458730e309856356d4ef4188438", + "sha256:503fa6af7da9f4b5780bb7e4cbe0c639b010f12be85d02c99452825dd0feef3f", + "sha256:56d027eace784738457437df7331965473f2c0da2c70e1a1f6fdbae5402e0389", + "sha256:5913a1177fc36e30fcf6dc868ce23b0453952c78c04c266d3149b3d39e1410d6", + "sha256:5b6ef7d9f9c38292df3690fe3e302b5b530999fa90014853dcd0d6902fb59f26", + "sha256:5cb1e18167792d7d21e21365d7650b72d5081ed476123ff7b8cac7f45189c0c7", + "sha256:61a7ee1f13ab913897dac7da44a73c6d44d48a4adff42a5701e3239791c96e14", + "sha256:68715970f16b6e92c574c30747c95cf8cf62804569647386ff032195dc89a430", + "sha256:6b2ae9f5f67f89aade1fab0f7fd8f2832501311c363a21579d02defa844d9296", + "sha256:6c772d6c0a79ac0f414a9f8947cc407e119b8598de7621f39cacadae3cf57d12", + "sha256:7cb81373984cc0e4682f31bc3d6be9026006d96eecd07ea49aafb06897746452", + "sha256:88c63a1b55f352b02c6ffd24b15ead9fc0e8bf781dbe070213039324922a2eea", + "sha256:93130612b837103e15ac3f9cbacb4613f9e348b58b3aad53721d92e57f96d46a", + "sha256:97f715cf371b16ac88b8c19da00029804e20e25f30d80203417255d239f228b5", + "sha256:9d12cf2851759b8de8ca5fde36a59c08210a97ffca0eb94c532ce7b17c6a3d1d", + "sha256:afde17ae04d90fbe53afb628f7f2d4ca022797aa093e809de5c3cf276f61bbfa", + "sha256:b663f1e02de5d0573610756398e44c130add0eb9a3fc912a09665332942a2efb", + "sha256:c2415d9d082152460f2bd4e382a1e85aed233abc92db5a3880da2257dc7daf7b", + "sha256:c83aa123d56f2e060644427a882a36b3c12db93727ad7a7b9efd7d7f3e9cc2c4", + "sha256:db844eb158a87ccab83e868a762ea8024ae27337fc7ddcbfcddd157f841fdfe7", + "sha256:defed7ea5f218a9f2336301e6fd379f55c655bea65ba2476346340a0ce6f74a1", + "sha256:f909bbbc433048b499cb9db9e713b5d8d949e8c109a2a548502fb9aa8630f0b1" ], - "version": "==0.7.0" + "version": "==1.0.9" }, "certifi": { "hashes": [ - "sha256:e4f3620cfea4f83eedc95b24abd9cd56f3c4b146dd0177e83a21b4eb49e21e50", - "sha256:fd7c7c74727ddcf00e9acd26bba8da604ffec95bf1c2144e67aff7a8b50e6cef" + "sha256:5930595817496dd21bb8dc35dad090f1c2cd0adfaf21204bf6732ca5d8ee34d3", + "sha256:8fc0819f1f30ba15bdb34cceffb9ef04d99f420f68eb75d901e9560b8749fc41" ], - "version": "==2019.9.11" + "version": "==2020.6.20" }, "cffi": { "hashes": [ - "sha256:041c81822e9f84b1d9c401182e174996f0bae9991f33725d059b771744290774", - "sha256:046ef9a22f5d3eed06334d01b1e836977eeef500d9b78e9ef693f9380ad0b83d", - "sha256:066bc4c7895c91812eff46f4b1c285220947d4aa46fa0a2651ff85f2afae9c90", - "sha256:066c7ff148ae33040c01058662d6752fd73fbc8e64787229ea8498c7d7f4041b", - "sha256:2444d0c61f03dcd26dbf7600cf64354376ee579acad77aef459e34efcb438c63", - "sha256:300832850b8f7967e278870c5d51e3819b9aad8f0a2c8dbe39ab11f119237f45", - "sha256:34c77afe85b6b9e967bd8154e3855e847b70ca42043db6ad17f26899a3df1b25", - "sha256:46de5fa00f7ac09f020729148ff632819649b3e05a007d286242c4882f7b1dc3", - "sha256:4aa8ee7ba27c472d429b980c51e714a24f47ca296d53f4d7868075b175866f4b", - "sha256:4d0004eb4351e35ed950c14c11e734182591465a33e960a4ab5e8d4f04d72647", - "sha256:4e3d3f31a1e202b0f5a35ba3bc4eb41e2fc2b11c1eff38b362de710bcffb5016", - "sha256:50bec6d35e6b1aaeb17f7c4e2b9374ebf95a8975d57863546fa83e8d31bdb8c4", - "sha256:55cad9a6df1e2a1d62063f79d0881a414a906a6962bc160ac968cc03ed3efcfb", - "sha256:5662ad4e4e84f1eaa8efce5da695c5d2e229c563f9d5ce5b0113f71321bcf753", - "sha256:59b4dc008f98fc6ee2bb4fd7fc786a8d70000d058c2bbe2698275bc53a8d3fa7", - "sha256:73e1ffefe05e4ccd7bcea61af76f36077b914f92b76f95ccf00b0c1b9186f3f9", - "sha256:a1f0fd46eba2d71ce1589f7e50a9e2ffaeb739fb2c11e8192aa2b45d5f6cc41f", - "sha256:a2e85dc204556657661051ff4bab75a84e968669765c8a2cd425918699c3d0e8", - "sha256:a5457d47dfff24882a21492e5815f891c0ca35fefae8aa742c6c263dac16ef1f", - "sha256:a8dccd61d52a8dae4a825cdbb7735da530179fea472903eb871a5513b5abbfdc", - "sha256:ae61af521ed676cf16ae94f30fe202781a38d7178b6b4ab622e4eec8cefaff42", - "sha256:b012a5edb48288f77a63dba0840c92d0504aa215612da4541b7b42d849bc83a3", - "sha256:d2c5cfa536227f57f97c92ac30c8109688ace8fa4ac086d19d0af47d134e2909", - "sha256:d42b5796e20aacc9d15e66befb7a345454eef794fdb0737d1af593447c6c8f45", - "sha256:dee54f5d30d775f525894d67b1495625dd9322945e7fee00731952e0368ff42d", - "sha256:e070535507bd6aa07124258171be2ee8dfc19119c28ca94c9dfb7efd23564512", - "sha256:e1ff2748c84d97b065cc95429814cdba39bcbd77c9c85c89344b317dc0d9cbff", - "sha256:ed851c75d1e0e043cbf5ca9a8e1b13c4c90f3fbd863dacb01c0808e2b5204201" + "sha256:005f2bfe11b6745d726dbb07ace4d53f057de66e336ff92d61b8c7e9c8f4777d", + "sha256:09e96138280241bd355cd585148dec04dbbedb4f46128f340d696eaafc82dd7b", + "sha256:0b1ad452cc824665ddc682400b62c9e4f5b64736a2ba99110712fdee5f2505c4", + "sha256:0ef488305fdce2580c8b2708f22d7785ae222d9825d3094ab073e22e93dfe51f", + "sha256:15f351bed09897fbda218e4db5a3d5c06328862f6198d4fb385f3e14e19decb3", + "sha256:22399ff4870fb4c7ef19fff6eeb20a8bbf15571913c181c78cb361024d574579", + "sha256:23e5d2040367322824605bc29ae8ee9175200b92cb5483ac7d466927a9b3d537", + "sha256:2791f68edc5749024b4722500e86303a10d342527e1e3bcac47f35fbd25b764e", + "sha256:2f9674623ca39c9ebe38afa3da402e9326c245f0f5ceff0623dccdac15023e05", + "sha256:3363e77a6176afb8823b6e06db78c46dbc4c7813b00a41300a4873b6ba63b171", + "sha256:33c6cdc071ba5cd6d96769c8969a0531be2d08c2628a0143a10a7dcffa9719ca", + "sha256:3b8eaf915ddc0709779889c472e553f0d3e8b7bdf62dab764c8921b09bf94522", + "sha256:3cb3e1b9ec43256c4e0f8d2837267a70b0e1ca8c4f456685508ae6106b1f504c", + "sha256:3eeeb0405fd145e714f7633a5173318bd88d8bbfc3dd0a5751f8c4f70ae629bc", + "sha256:44f60519595eaca110f248e5017363d751b12782a6f2bd6a7041cba275215f5d", + "sha256:4d7c26bfc1ea9f92084a1d75e11999e97b62d63128bcc90c3624d07813c52808", + "sha256:529c4ed2e10437c205f38f3691a68be66c39197d01062618c55f74294a4a4828", + "sha256:6642f15ad963b5092d65aed022d033c77763515fdc07095208f15d3563003869", + "sha256:85ba797e1de5b48aa5a8427b6ba62cf69607c18c5d4eb747604b7302f1ec382d", + "sha256:8f0f1e499e4000c4c347a124fa6a27d37608ced4fe9f7d45070563b7c4c370c9", + "sha256:a624fae282e81ad2e4871bdb767e2c914d0539708c0f078b5b355258293c98b0", + "sha256:b0358e6fefc74a16f745afa366acc89f979040e0cbc4eec55ab26ad1f6a9bfbc", + "sha256:bbd2f4dfee1079f76943767fce837ade3087b578aeb9f69aec7857d5bf25db15", + "sha256:bf39a9e19ce7298f1bd6a9758fa99707e9e5b1ebe5e90f2c3913a47bc548747c", + "sha256:c11579638288e53fc94ad60022ff1b67865363e730ee41ad5e6f0a17188b327a", + "sha256:c150eaa3dadbb2b5339675b88d4573c1be3cb6f2c33a6c83387e10cc0bf05bd3", + "sha256:c53af463f4a40de78c58b8b2710ade243c81cbca641e34debf3396a9640d6ec1", + "sha256:cb763ceceae04803adcc4e2d80d611ef201c73da32d8f2722e9d0ab0c7f10768", + "sha256:cc75f58cdaf043fe6a7a6c04b3b5a0e694c6a9e24050967747251fb80d7bce0d", + "sha256:d80998ed59176e8cba74028762fbd9b9153b9afc71ea118e63bbf5d4d0f9552b", + "sha256:de31b5164d44ef4943db155b3e8e17929707cac1e5bd2f363e67a56e3af4af6e", + "sha256:e66399cf0fc07de4dce4f588fc25bfe84a6d1285cc544e67987d22663393926d", + "sha256:f0620511387790860b249b9241c2f13c3a80e21a73e0b861a2df24e9d6f56730", + "sha256:f4eae045e6ab2bb54ca279733fe4eb85f1effda392666308250714e01907f394", + "sha256:f92cdecb618e5fa4658aeb97d5eb3d2f47aa94ac6477c6daf0f306c5a3b9e6b1", + "sha256:f92f789e4f9241cd262ad7a555ca2c648a98178a953af117ef7fad46aa1d5591" ], - "version": "==1.12.3" + "version": "==1.14.3" }, "click": { "hashes": [ - "sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d", - "sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b" + "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a", + "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc" ], - "version": "==6.7" + "version": "==7.1.2" }, "construct": { "hashes": [ @@ -121,27 +128,39 @@ }, "cryptography": { "hashes": [ - "sha256:02602e1672b62e803e08617ec286041cc453e8d43f093a5f4162095506bc0beb", - "sha256:10b48e848e1edb93c1d3b797c83c72b4c387ab0eb4330aaa26da8049a6cbede0", - "sha256:17db09db9d7c5de130023657be42689d1a5f60502a14f6f745f6f65a6b8195c0", - "sha256:227da3a896df1106b1a69b1e319dce218fa04395e8cc78be7e31ca94c21254bc", - "sha256:2cbaa03ac677db6c821dac3f4cdfd1461a32d0615847eedbb0df54bb7802e1f7", - "sha256:31db8febfc768e4b4bd826750a70c79c99ea423f4697d1dab764eb9f9f849519", - "sha256:4a510d268e55e2e067715d728e4ca6cd26a8e9f1f3d174faf88e6f2cb6b6c395", - "sha256:6a88d9004310a198c474d8a822ee96a6dd6c01efe66facdf17cb692512ae5bc0", - "sha256:76936ec70a9b72eb8c58314c38c55a0336a2b36de0c7ee8fb874a4547cadbd39", - "sha256:7e3b4aecc4040928efa8a7cdaf074e868af32c58ffc9bb77e7bf2c1a16783286", - "sha256:8168bcb08403ef144ff1fb880d416f49e2728101d02aaadfe9645883222c0aa5", - "sha256:8229ceb79a1792823d87779959184a1bf95768e9248c93ae9f97c7a2f60376a1", - "sha256:8a19e9f2fe69f6a44a5c156968d9fc8df56d09798d0c6a34ccc373bb186cee86", - "sha256:8d10113ca826a4c29d5b85b2c4e045ffa8bad74fb525ee0eceb1d38d4c70dfd6", - "sha256:be495b8ec5a939a7605274b6e59fbc35e76f5ad814ae010eb679529671c9e119", - "sha256:dc2d3f3b1548f4d11786616cf0f4415e25b0fbecb8a1d2cd8c07568f13fdde38", - "sha256:e4aecdd9d5a3d06c337894c9a6e2961898d3f64fe54ca920a72234a3de0f9cb3", - "sha256:e79ab4485b99eacb2166f3212218dd858258f374855e1568f728462b0e6ee0d9", - "sha256:f995d3667301e1754c57b04e0bae6f0fa9d710697a9f8d6712e8cca02550910f" + "sha256:07ca431b788249af92764e3be9a488aa1d39a0bc3be313d826bbec690417e538", + "sha256:13b88a0bd044b4eae1ef40e265d006e34dbcde0c2f1e15eb9896501b2d8f6c6f", + "sha256:32434673d8505b42c0de4de86da8c1620651abd24afe91ae0335597683ed1b77", + "sha256:3cd75a683b15576cfc822c7c5742b3276e50b21a06672dc3a800a2d5da4ecd1b", + "sha256:4e7268a0ca14536fecfdf2b00297d4e407da904718658c1ff1961c713f90fd33", + "sha256:545a8550782dda68f8cdc75a6e3bf252017aa8f75f19f5a9ca940772fc0cb56e", + "sha256:55d0b896631412b6f0c7de56e12eb3e261ac347fbaa5d5e705291a9016e5f8cb", + "sha256:5849d59358547bf789ee7e0d7a9036b2d29e9a4ddf1ce5e06bb45634f995c53e", + "sha256:6dc59630ecce8c1f558277ceb212c751d6730bd12c80ea96b4ac65637c4f55e7", + "sha256:7117319b44ed1842c617d0a452383a5a052ec6aa726dfbaffa8b94c910444297", + "sha256:75e8e6684cf0034f6bf2a97095cb95f81537b12b36a8fedf06e73050bb171c2d", + "sha256:7b8d9d8d3a9bd240f453342981f765346c87ade811519f98664519696f8e6ab7", + "sha256:a035a10686532b0587d58a606004aa20ad895c60c4d029afa245802347fab57b", + "sha256:a4e27ed0b2504195f855b52052eadcc9795c59909c9d84314c5408687f933fc7", + "sha256:a733671100cd26d816eed39507e585c156e4498293a907029969234e5e634bc4", + "sha256:a75f306a16d9f9afebfbedc41c8c2351d8e61e818ba6b4c40815e2b5740bb6b8", + "sha256:bd717aa029217b8ef94a7d21632a3bb5a4e7218a4513d2521c2a2fd63011e98b", + "sha256:d25cecbac20713a7c3bc544372d42d8eafa89799f492a43b79e1dfd650484851", + "sha256:d26a2557d8f9122f9bf445fc7034242f4375bd4e95ecda007667540270965b13", + "sha256:d3545829ab42a66b84a9aaabf216a4dce7f16dbc76eb69be5c302ed6b8f4a29b", + "sha256:d3d5e10be0cf2a12214ddee45c6bd203dab435e3d83b4560c03066eda600bfe3", + "sha256:efe15aca4f64f3a7ea0c09c87826490e50ed166ce67368a68f315ea0807a20df" ], - "version": "==2.3.1" + "index": "pypi", + "version": "==3.2.1" + }, + "dataclasses": { + "hashes": [ + "sha256:3459118f7ede7c8bea0fe795bff7c6c2ce287d01dd226202f7c9ebc0610a7836", + "sha256:494a6dcae3b8bcf80848eea2ef64c0cc5cd307ffc263e17cdf42f3e5420808e6" + ], + "markers": "python_version < '3.7'", + "version": "==0.7" }, "docopt": { "hashes": [ @@ -150,162 +169,378 @@ "index": "pypi", "version": "==0.6.2" }, + "flask": { + "hashes": [ + "sha256:4efa1ae2d7c9865af48986de8aeb8504bf32c7f3d6fdc9353d34b21f4b127060", + "sha256:8a4fdd8936eba2512e9c85df320a37e694c93945b33ef33c89946a340a238557" + ], + "version": "==1.1.2" + }, "h11": { "hashes": [ - "sha256:1c0fbb1cba6f809fe3e6b27f8f6d517ca171f848922708871403636143d530d9", - "sha256:af77d5d82fa027c032650fb8afdef3cd0a3735ba01480bee908cddad9be1bdce" + "sha256:3c6c61d69c6f13d41f1b80ab0322f1872702a3ba26e12aa864c928f6a43fbaab", + "sha256:ab6c335e1b6ef34b205d5ca3e228c9299cc7218b049819ec84a388c2525e5d87" ], - "version": "==0.7.0" + "version": "==0.11.0" }, "h2": { "hashes": [ - "sha256:ac377fcf586314ef3177bfd90c12c7826ab0840edeb03f0f24f511858326049e", - "sha256:b8a32bd282594424c0ac55845377eea13fa54fe4a8db012f3a198ed923dc3ab4" + "sha256:ac9e293a1990b339d5d71b19c5fe630e3dd4d768c620d1730d355485323f1b25", + "sha256:bb7ac7099dd67a857ed52c815a6192b6b1f5ba6b516237fc24a085341340593d" ], - "version": "==3.1.1" + "markers": "python_version >= '3.6.0'", + "version": "==4.0.0" }, "hpack": { "hashes": [ - "sha256:0edd79eda27a53ba5be2dfabf3b15780928a0dff6eb0c60a3d6767720e970c89", - "sha256:8eec9c1f4bfae3408a3f30500261f7e6a65912dc138526ea054f9ad98892e9d2" + "sha256:84a076fad3dc9a9f8063ccb8041ef100867b1878b25ef0ee63847a5d53818a6c", + "sha256:fc41de0c63e687ebffde81187a948221294896f6bdc0ae2312708df339430095" ], - "version": "==3.0.0" + "version": "==4.0.0" }, "hyperframe": { "hashes": [ - "sha256:5187962cb16dcc078f23cb5a4b110098d546c3f41ff2d4038a9896893bbd0b40", - "sha256:a9f5c17f2cc3c719b917c4f33ed1c61bd1f8dfac4b1bd23b7c80b3400971b41f" + "sha256:742d2a4bc3152a340a49d59f32e33ec420aa8e7054c1444ef5c7efff255842f1", + "sha256:a51026b1591cac726fc3d0b7994fbc7dc5efab861ef38503face2930fd7b2d34" ], - "version": "==5.2.0" + "markers": "python_version >= '3.6.0'", + "version": "==6.0.0" }, - "idna": { + "itsdangerous": { "hashes": [ - "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", - "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", + "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" ], - "version": "==2.8" + "version": "==1.1.0" + }, + "jinja2": { + "hashes": [ + "sha256:89aab215427ef59c34ad58735269eb58b1a5808103067f7bb9d5836c651b3bb0", + "sha256:f0a4641d3cf955324a89c04f3d94663aa4d638abe8f733ecd3582848e1c37035" + ], + "version": "==2.11.2" }, "kaitaistruct": { "hashes": [ - "sha256:d1d17c7f6839b3d28fc22b21295f787974786c2201e8788975e72e2a1d109ff5" + "sha256:3d5845817ec8a4d5504379cc11bd570b038850ee49c4580bc0998c8fb1d327ad" ], - "version": "==0.8" + "version": "==0.9" }, "ldap3": { "hashes": [ - "sha256:3f67c83185b1f0df8fdf6b52fa42c55bc9e9b7120c8b7fec60f0d6003c536d18", - "sha256:dd9be8ea27773c4ffc18ede0b95c3ca1eb12513a184590b9f8ae423db3f71eb9" + "sha256:37d633e20fa360c302b1263c96fe932d40622d0119f1bddcb829b03462eeeeb7", + "sha256:7c3738570766f5e5e74a56fade15470f339d5c436d821cf476ef27da0a4de8b0" ], - "version": "==2.5.2" + "version": "==2.8.1" + }, + "markupsafe": { + "hashes": [ + "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", + "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", + "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", + "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", + "sha256:13d3144e1e340870b25e7b10b98d779608c02016d5184cfb9927a9f10c689f42", + "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", + "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", + "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", + "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", + "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", + "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", + "sha256:596510de112c685489095da617b5bcbbac7dd6384aeebeda4df6025d0256a81b", + "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", + "sha256:6788b695d50a51edb699cb55e35487e430fa21f1ed838122d722e0ff0ac5ba15", + "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", + "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", + "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", + "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", + "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", + "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", + "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", + "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", + "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", + "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", + "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", + "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", + "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", + "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", + "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", + "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", + "sha256:cdb132fc825c38e1aeec2c8aa9338310d29d337bebbd7baa06889d09a60a1fa2", + "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7", + "sha256:e8313f01ba26fbbe36c7be1966a7b7424942f670f38e666995b88d012765b9be" + ], + "version": "==1.1.1" }, "mitmproxy": { "hashes": [ - "sha256:e74869c7bf4e5b988fbe3a3d0039f430d1e1eeb5927abf2097183a711bf5b312" + "sha256:481940365fc08fc2318343e530ef01d35084e8b56d1c61b5e1a7b6ed9b664d24" ], "index": "pypi", - "version": "==4.0.4" + "version": "==5.3.0" + }, + "msgpack": { + "hashes": [ + "sha256:002a0d813e1f7b60da599bdf969e632074f9eec1b96cbed8fb0973a63160a408", + "sha256:25b3bc3190f3d9d965b818123b7752c5dfb953f0d774b454fd206c18fe384fb8", + "sha256:271b489499a43af001a2e42f42d876bb98ccaa7e20512ff37ca78c8e12e68f84", + "sha256:39c54fdebf5fa4dda733369012c59e7d085ebdfe35b6cf648f09d16708f1be5d", + "sha256:4233b7f86c1208190c78a525cd3828ca1623359ef48f78a6fea4b91bb995775a", + "sha256:5bea44181fc8e18eed1d0cd76e355073f00ce232ff9653a0ae88cb7d9e643322", + "sha256:5dba6d074fac9b24f29aaf1d2d032306c27f04187651511257e7831733293ec2", + "sha256:7a22c965588baeb07242cb561b63f309db27a07382825fc98aecaf0827c1538e", + "sha256:908944e3f038bca67fcfedb7845c4a257c7749bf9818632586b53bcf06ba4b97", + "sha256:9534d5cc480d4aff720233411a1f765be90885750b07df772380b34c10ecb5c0", + "sha256:aa5c057eab4f40ec47ea6f5a9825846be2ff6bf34102c560bad5cad5a677c5be", + "sha256:b3758dfd3423e358bbb18a7cccd1c74228dffa7a697e5be6cb9535de625c0dbf", + "sha256:c901e8058dd6653307906c5f157f26ed09eb94a850dddd989621098d347926ab", + "sha256:cec8bf10981ed70998d98431cd814db0ecf3384e6b113366e7f36af71a0fca08", + "sha256:db685187a415f51d6b937257474ca72199f393dad89534ebbdd7d7a3b000080e", + "sha256:e35b051077fc2f3ce12e7c6a34cf309680c63a842db3a0616ea6ed25ad20d272", + "sha256:e7bbdd8e2b277b77782f3ce34734b0dfde6cbe94ddb74de8d733d603c7f9e2b1", + "sha256:ea41c9219c597f1d2bf6b374d951d310d58684b5de9dc4bd2976db9e1e22c140" + ], + "version": "==1.0.0" }, "passlib": { "hashes": [ - "sha256:3d948f64138c25633613f303bcc471126eae67c04d5e3f6b7b8ce6242f8653e0", - "sha256:43526aea08fa32c6b6dbbbe9963c4c767285b78147b7437597f992812f69d280" + "sha256:aa6bca462b8d8bda89c70b382f0c298a20b5560af6cbfa2dce410c0a2fb669f1", + "sha256:defd50f72b65c5402ab2c573830a6978e5f202ad0d984793c8dde2c4152ebe04" ], - "version": "==1.7.1" + "version": "==1.7.4" + }, + "protobuf": { + "hashes": [ + "sha256:0bba42f439bf45c0f600c3c5993666fcb88e8441d011fad80a11df6f324eef33", + "sha256:1e834076dfef9e585815757a2c7e4560c7ccc5962b9d09f831214c693a91b463", + "sha256:339c3a003e3c797bc84499fa32e0aac83c768e67b3de4a5d7a5a9aa3b0da634c", + "sha256:361acd76f0ad38c6e38f14d08775514fbd241316cce08deb2ce914c7dfa1184a", + "sha256:3dee442884a18c16d023e52e32dd34a8930a889e511af493f6dc7d4d9bf12e4f", + "sha256:4d1174c9ed303070ad59553f435846a2f877598f59f9afc1b89757bdf846f2a7", + "sha256:5db9d3e12b6ede5e601b8d8684a7f9d90581882925c96acf8495957b4f1b204b", + "sha256:6a82e0c8bb2bf58f606040cc5814e07715b2094caeba281e2e7d0b0e2e397db5", + "sha256:8c35bcbed1c0d29b127c886790e9d37e845ffc2725cc1db4bd06d70f4e8359f4", + "sha256:91c2d897da84c62816e2f473ece60ebfeab024a16c1751aaf31100127ccd93ec", + "sha256:9c2e63c1743cba12737169c447374fab3dfeb18111a460a8c1a000e35836b18c", + "sha256:9edfdc679a3669988ec55a989ff62449f670dfa7018df6ad7f04e8dbacb10630", + "sha256:c0c5ab9c4b1eac0a9b838f1e46038c3175a95b0f2d944385884af72876bd6bc7", + "sha256:c8abd7605185836f6f11f97b21200f8a864f9cb078a193fe3c9e235711d3ff1e", + "sha256:d69697acac76d9f250ab745b46c725edf3e98ac24763990b24d58c16c642947a", + "sha256:df3932e1834a64b46ebc262e951cd82c3cf0fa936a154f0a42231140d8237060", + "sha256:e7662437ca1e0c51b93cadb988f9b353fa6b8013c0385d63a70c8a77d84da5f9", + "sha256:f68eb9d03c7d84bd01c790948320b768de8559761897763731294e3bc316decb" + ], + "version": "==3.13.0" + }, + "publicsuffix2": { + "hashes": [ + "sha256:00f8cc31aa8d0d5592a5ced19cccba7de428ebca985db26ac852d920ddd6fe7b", + "sha256:786b5e36205b88758bd3518725ec8cfe7a8173f5269354641f581c6b80a99893" + ], + "version": "==2.20191221" }, "pyasn1": { "hashes": [ - "sha256:62cdade8b5530f0b185e09855dd422bc05c0bbff6b72ff61381c09dac7befd8c", - "sha256:a9495356ca1d66ed197a0f72b41eb1823cf7ea8b5bd07191673e8147aecf8604" + "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", + "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba" ], - "version": "==0.4.7" + "version": "==0.4.8" }, "pycparser": { "hashes": [ - "sha256:a988718abfad80b6b157acce7bf130a30876d27603738ac39f140993246b25b3" + "sha256:2d475327684562c3a96cc71adf7dc8c4f0565175cf86b6d7a404ff4c771f15f0", + "sha256:7582ad22678f0fcd81102833f60ef8d0e57288b6b5fb00323d101be910e35705" ], - "version": "==2.19" + "version": "==2.20" }, "pyopenssl": { "hashes": [ - "sha256:26ff56a6b5ecaf3a2a59f132681e2a80afcc76b4f902f612f518f92c2a1bf854", - "sha256:6488f1423b00f73b7ad5167885312bb0ce410d3312eb212393795b53c8caa580" + "sha256:621880965a720b8ece2f1b2f54ea2071966ab00e2970ad2ce11d596102063504", + "sha256:9a24494b2602aaf402be5c9e30a0b82d4a5c67528fe8fb475e3f3bc00dd69507" ], - "version": "==18.0.0" + "version": "==19.1.0" }, "pyparsing": { "hashes": [ - "sha256:bc6c7146b91af3f567cf6daeaec360bc07d45ffec4cf5353f4d7a208ce7ca30a", - "sha256:d29593d8ebe7b57d6967b62494f8c72b03ac0262b1eed63826c6f788b3606401" + "sha256:c203ec8783bf771a155b207279b9bccb8dea02d8f0c9e5f8ead507bc3246ecc1", + "sha256:ef9d7589ef3c200abe66653d3f1ab1033c3c419ae9b9bdb1240a85b024efc88b" ], - "version": "==2.2.2" + "version": "==2.4.7" }, "pyperclip": { "hashes": [ - "sha256:406bc020d4b8e60d8673876271b815befc4c02fd8d919e4aacc667d69fab99ea" + "sha256:9abef1e79ce635eb62309ecae02dfb5a3eb952fa7d6dce09c1aef063f81424d3" ], - "version": "==1.6.5" + "version": "==1.8.1" }, "ruamel.yaml": { "hashes": [ - "sha256:08aaaa74ff66565024ecabf9ba2db212712382a21c0458f9a91c623a1fa83b34", - "sha256:23f2efb872d2ebe3d5428b4f1a8f30cbf59f56e780c4981c155411ee65572673", - "sha256:38718e69270141c403b5fc539f774ed394568f8a5195b507991f5b690356facb", - "sha256:44da2be1153e173f90ad8775d4ac4237a3c06cfbb9660c1c1980271621833faa", - "sha256:4b1674a936cdae9735578d4fd64bcbc6cfbb77a1a8f7037a50c6e3874ba4c9d8", - "sha256:51d49c870aca850e652e2cd1c9bea9b52b77d13ad52b0556de496c1d264ea65f", - "sha256:63dc8c6147a4cf77efadf2ae0f34e89e03de79289298bb941b7ae333d5d4020b", - "sha256:6672798c6b52a976a7b24e20665055852388c83198d88029d3c76e2197ac221a", - "sha256:6b6025f9b6a557e15e9fdfda4d9af0b57cd8d59ff98e23a0097ab2d7c0540f07", - "sha256:7b750252e3d1ec5b53d03be508796c04a907060900c7d207280b7456650ebbfc", - "sha256:847177699994f9c31adf78d1ef1ff8f069ef0241e744a3ee8b30fbdaa914cc1e", - "sha256:8e42f3067a59e819935a2926e247170ed93c8f0b2ab64526f888e026854db2e4", - "sha256:922d9e483c05d9000256640026f277fcc0c2e1e9271d05acada8e6cfb4c8b721", - "sha256:92a8ca79f9173cca29ca9663b49d9c936aefc4c8a76f39318b0218c8f3626438", - "sha256:ab8eeca4de4decf0d0a42cb6949d354da9fc70a2d9201f0dd55186c599b2e3a5", - "sha256:bd4b60b649f4a81086f70cd56eff4722018ef36a28094c396f1a53bf450bd579", - "sha256:fc6471ef15b69e454cca82433ac5f84929d9f3e2d72b9e54b06850b6b7133cc0", - "sha256:ffc89770339191acbe5a15041950b5ad9daec7d659619b0ed9dad8c9c80c26f3" + "sha256:012b9470a0ea06e4e44e99e7920277edf6b46eee0232a04487ea73a7386340a5", + "sha256:076cc0bc34f1966d920a49f18b52b6ad559fbe656a0748e3535cf7b3f29ebf9e" ], - "version": "==0.15.100" + "version": "==0.16.12" + }, + "ruamel.yaml.clib": { + "hashes": [ + "sha256:058a1cc3df2a8aecc12f983a48bda99315cebf55a3b3a5463e37bb599b05727b", + "sha256:2602e91bd5c1b874d6f93d3086f9830f3e907c543c7672cf293a97c3fabdcd91", + "sha256:28116f204103cb3a108dfd37668f20abe6e3cafd0d3fd40dba126c732457b3cc", + "sha256:2d24bd98af676f4990c4d715bcdc2a60b19c56a3fb3a763164d2d8ca0e806ba7", + "sha256:30dca9bbcbb1cc858717438218d11eafb78666759e5094dd767468c0d577a7e7", + "sha256:44c7b0498c39f27795224438f1a6be6c5352f82cb887bc33d962c3a3acc00df6", + "sha256:464e66a04e740d754170be5e740657a3b3b6d2bcc567f0c3437879a6e6087ff6", + "sha256:4df5019e7783d14b79217ad9c56edf1ba7485d614ad5a385d1b3c768635c81c0", + "sha256:4e52c96ca66de04be42ea2278012a2342d89f5e82b4512fb6fb7134e377e2e62", + "sha256:5254af7d8bdf4d5484c089f929cb7f5bafa59b4f01d4f48adda4be41e6d29f99", + "sha256:52ae5739e4b5d6317b52f5b040b1b6639e8af68a5b8fd606a8b08658fbd0cab5", + "sha256:53b9dd1abd70e257a6e32f934ebc482dac5edb8c93e23deb663eac724c30b026", + "sha256:73b3d43e04cc4b228fa6fa5d796409ece6fcb53a6c270eb2048109cbcbc3b9c2", + "sha256:74161d827407f4db9072011adcfb825b5258a5ccb3d2cd518dd6c9edea9e30f1", + "sha256:839dd72545ef7ba78fd2aa1a5dd07b33696adf3e68fae7f31327161c1093001b", + "sha256:8e8fd0a22c9d92af3a34f91e8a2594eeb35cba90ab643c5e0e643567dc8be43e", + "sha256:a873e4d4954f865dcb60bdc4914af7eaae48fb56b60ed6daa1d6251c72f5337c", + "sha256:ab845f1f51f7eb750a78937be9f79baea4a42c7960f5a94dde34e69f3cce1988", + "sha256:b1e981fe1aff1fd11627f531524826a4dcc1f26c726235a52fcb62ded27d150f", + "sha256:b4b0d31f2052b3f9f9b5327024dc629a253a83d8649d4734ca7f35b60ec3e9e5", + "sha256:c6ac7e45367b1317e56f1461719c853fd6825226f45b835df7436bb04031fd8a", + "sha256:daf21aa33ee9b351f66deed30a3d450ab55c14242cfdfcd377798e2c0d25c9f1", + "sha256:e9f7d1d8c26a6a12c23421061f9022bb62704e38211fe375c645485f38df34a2", + "sha256:f6061a31880c1ed6b6ce341215336e2f3d0c1deccd84957b6fa8ca474b41e89f" + ], + "markers": "platform_python_implementation == 'CPython' and python_version < '3.9'", + "version": "==0.2.2" }, "six": { "hashes": [ - "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", - "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", + "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" ], - "version": "==1.12.0" + "version": "==1.15.0" }, "sortedcontainers": { "hashes": [ - "sha256:220bb2e3e1886297fd7cdd6d164cb5cf237be1cfae1a3a3e526d149c52816682", - "sha256:b74f2756fb5e23512572cc76f0fe0832fd86310f77dfee54335a35fb33f6b950" + "sha256:4e73a757831fc3ca4de2859c422564239a31d8213d09a2a666e375807034d2ba", + "sha256:c633ebde8580f241f274c1f8994a665c0e54a17724fecd0cae2f079e09c36d3f" ], - "version": "==2.0.5" + "version": "==2.2.2" }, "tornado": { "hashes": [ - "sha256:0662d28b1ca9f67108c7e3b77afabfb9c7e87bde174fbda78186ecedc2499a9d", - "sha256:4e5158d97583502a7e2739951553cbd88a72076f152b4b11b64b9a10c4c49409", - "sha256:732e836008c708de2e89a31cb2fa6c0e5a70cb60492bee6f1ea1047500feaf7f", - "sha256:8154ec22c450df4e06b35f131adc4f2f3a12ec85981a203301d310abf580500f", - "sha256:8e9d728c4579682e837c92fdd98036bd5cdefa1da2aaf6acf26947e6dd0c01c5", - "sha256:d4b3e5329f572f055b587efc57d29bd051589fb5a43ec8898c77a47ec2fa2bbb", - "sha256:e5f2585afccbff22390cddac29849df463b252b711aa2ce7c5f3f342a5b3b444" + "sha256:0a00ff4561e2929a2c37ce706cb8233b7907e0cdc22eab98888aca5dd3775feb", + "sha256:0d321a39c36e5f2c4ff12b4ed58d41390460f798422c4504e09eb5678e09998c", + "sha256:1e8225a1070cd8eec59a996c43229fe8f95689cb16e552d130b9793cb570a288", + "sha256:20241b3cb4f425e971cb0a8e4ffc9b0a861530ae3c52f2b0434e6c1b57e9fd95", + "sha256:25ad220258349a12ae87ede08a7b04aca51237721f63b1808d39bdb4b2164558", + "sha256:33892118b165401f291070100d6d09359ca74addda679b60390b09f8ef325ffe", + "sha256:33c6e81d7bd55b468d2e793517c909b139960b6c790a60b7991b9b6b76fb9791", + "sha256:3447475585bae2e77ecb832fc0300c3695516a47d46cefa0528181a34c5b9d3d", + "sha256:34ca2dac9e4d7afb0bed4677512e36a52f09caa6fded70b4e3e1c89dbd92c326", + "sha256:3e63498f680547ed24d2c71e6497f24bca791aca2fe116dbc2bd0ac7f191691b", + "sha256:548430be2740e327b3fe0201abe471f314741efcb0067ec4f2d7dcfb4825f3e4", + "sha256:6196a5c39286cc37c024cd78834fb9345e464525d8991c21e908cc046d1cc02c", + "sha256:61b32d06ae8a036a6607805e6720ef00a3c98207038444ba7fd3d169cd998910", + "sha256:6286efab1ed6e74b7028327365cf7346b1d777d63ab30e21a0f4d5b275fc17d5", + "sha256:65d98939f1a2e74b58839f8c4dab3b6b3c1ce84972ae712be02845e65391ac7c", + "sha256:66324e4e1beede9ac79e60f88de548da58b1f8ab4b2f1354d8375774f997e6c0", + "sha256:6c77c9937962577a6a76917845d06af6ab9197702a42e1346d8ae2e76b5e3675", + "sha256:70dec29e8ac485dbf57481baee40781c63e381bebea080991893cd297742b8fd", + "sha256:7250a3fa399f08ec9cb3f7b1b987955d17e044f1ade821b32e5f435130250d7f", + "sha256:748290bf9112b581c525e6e6d3820621ff020ed95af6f17fedef416b27ed564c", + "sha256:7da13da6f985aab7f6f28debab00c67ff9cbacd588e8477034c0652ac141feea", + "sha256:8f959b26f2634a091bb42241c3ed8d3cedb506e7c27b8dd5c7b9f745318ddbb6", + "sha256:9de9e5188a782be6b1ce866e8a51bc76a0fbaa0e16613823fc38e4fc2556ad05", + "sha256:a48900ecea1cbb71b8c71c620dee15b62f85f7c14189bdeee54966fbd9a0c5bd", + "sha256:b87936fd2c317b6ee08a5741ea06b9d11a6074ef4cc42e031bc6403f82a32575", + "sha256:c77da1263aa361938476f04c4b6c8916001b90b2c2fdd92d8d535e1af48fba5a", + "sha256:cb5ec8eead331e3bb4ce8066cf06d2dfef1bfb1b2a73082dfe8a161301b76e37", + "sha256:cc0ee35043162abbf717b7df924597ade8e5395e7b66d18270116f8745ceb795", + "sha256:d14d30e7f46a0476efb0deb5b61343b1526f73ebb5ed84f23dc794bdb88f9d9f", + "sha256:d371e811d6b156d82aa5f9a4e08b58debf97c302a35714f6f45e35139c332e32", + "sha256:d3d20ea5782ba63ed13bc2b8c291a053c8d807a8fa927d941bd718468f7b950c", + "sha256:d3f7594930c423fd9f5d1a76bee85a2c36fd8b4b16921cae7e965f22575e9c01", + "sha256:dcef026f608f678c118779cd6591c8af6e9b4155c44e0d1bc0c87c036fb8c8c4", + "sha256:e0791ac58d91ac58f694d8d2957884df8e4e2f6687cdf367ef7eb7497f79eaa2", + "sha256:e385b637ac3acaae8022e7e47dfa7b83d3620e432e3ecb9a3f7f58f150e50921", + "sha256:e519d64089b0876c7b467274468709dadf11e41d65f63bba207e04217f47c085", + "sha256:e7229e60ac41a1202444497ddde70a48d33909e484f96eb0da9baf8dc68541df", + "sha256:ed3ad863b1b40cd1d4bd21e7498329ccaece75db5a5bf58cd3c9f130843e7102", + "sha256:f0ba29bafd8e7e22920567ce0d232c26d4d47c8b5cf4ed7b562b5db39fa199c5", + "sha256:fa2ba70284fa42c2a5ecb35e322e68823288a4251f9ba9cc77be04ae15eada68", + "sha256:fba85b6cd9c39be262fcd23865652920832b61583de2a2ca907dbd8e8a8c81e5" ], - "version": "==5.1.1" + "version": "==6.1" }, "urwid": { "hashes": [ - "sha256:644d3e3900867161a2fc9287a9762753d66bd194754679adb26aede559bcccbc" + "sha256:588bee9c1cb208d0906a9f73c613d2bd32c3ed3702012f51efe318a3f2127eae" ], - "version": "==2.0.1" + "version": "==2.1.2" + }, + "werkzeug": { + "hashes": [ + "sha256:2de2a5db0baeae7b2d2664949077c2ac63fbd16d98da0ff71837f7d1dea3fd43", + "sha256:6c80b1e5ad3665290ea39320b91e1be1e0d5f60652b964a3070216de83d2e47c" + ], + "version": "==1.0.1" }, "wsproto": { "hashes": [ - "sha256:02f214f6bb43cda62a511e2e8f1d5fa4703ed83d376d18d042bd2bbf2e995824", - "sha256:d2a7f718ab3144ec956a3267d57b5c172f0668827f5803e7d670837b0125b9fa" + "sha256:614798c30e5dc2b3f65acc03d2d50842b97621487350ce79a80a711229edfa9d", + "sha256:e3d190a11d9307112ba23bbe60055604949b172143969c8f641318476a9b6f1d" ], - "version": "==0.11.0" + "version": "==0.15.0" + }, + "zstandard": { + "hashes": [ + "sha256:0646bd506cd1c83b94a5057568cbc7868f656c79ac22d2e19e9d280f64451a0c", + "sha256:0c3ea262cee9c8a624ae22760466a8144c3c2b62da6f2b2671f47d9f74d8315f", + "sha256:22362a1b5bf8693692be1d1609a25159cd67d5ff93200a2978aea815a63739e8", + "sha256:25ec0734f8c2eee8fd140cae3cde0ffc531ab6730be1f48b2b868a409a1a233d", + "sha256:2e66459d260d2332c5044625dc9f50ef883fe4366c15915d4d0deedb3b1dcba6", + "sha256:2f491936999f43301c424aaa9e03461ea218d9bb8574c1672a09260d30a4096e", + "sha256:39339ed8e0351e3a1d9e0792c5a77ac7da2091279dd78f3458d456bdc3cbb25e", + "sha256:3b41598ffc3cb3497bd6019aeeb1a55e272d3106f15d7855339eab92ed7659e8", + "sha256:4286cd5d76c9a2bf7cb9f9065c8f68b12221ddbcfba754577692442dce563995", + "sha256:45a3b64812152bf188044a1170bcaaeaee2175ec5340ea6a6810bf94b088886e", + "sha256:45e96e1b3bcf8f1060fad174938bfc9825f5d864ddc717b3dda1d876ab59eaaf", + "sha256:50f7692f32ebd86b87133f25211850f5025e730f75b364dfaab30e817a7780a1", + "sha256:6525190e90d49e07c88f88ee7cf02e1af76f9bf32a693e8dd6b8a5fe01b65079", + "sha256:68840f8117d087ecb82c2dfb7f32de237261220a569ea93a8bc0afeffb03ab58", + "sha256:68d15b407ac1f18e03fb89c93ade275cca766cb7eff03b26b40fdf9dba100679", + "sha256:754bcb077e2f946868e77670fb59907ac291542a14c836f89716376cd099107c", + "sha256:83f81d7c2e45e65654ea881683e7e597e813a862ba8e0596945de46657fbc285", + "sha256:85f59177e6a3cab285471a0e7ce048d07f6d39080b9766f8eaaf274f979f0afc", + "sha256:86494400d3923917124bd5f50b8e096de1dd7cfd890b164253bcd2283ef19539", + "sha256:8cb4cd3bb2e7213dd09432f8182d9acc8997bcd34fa3be44dffbb3f82d8d6dfd", + "sha256:9052398da52e8702cf9929999c8986b0f68b18c793e309cd8dff5cb7863d7652", + "sha256:9052870eeebbf4787fc9fc20703d16b6c32b4fffa1446045d05c64a8cb34f614", + "sha256:9119a52758dce523e82318433d41bc8053051af6d7dadd2ff3ada24d1cbf28cf", + "sha256:9572d3047579220f950e7fd6af647cc95e361dc671d10ad63215e07f147eec31", + "sha256:9d7d49b2d46233280c0a0d27046ab9321ceae329c4cbe8cffddfebb53dff3da2", + "sha256:a012f237fa5b00708f00e362035c032d1af5536796f9b410e76e61722176f607", + "sha256:a1ea3108dde195f9fb18fe99ee1674f85a99056793d2ea72fb3965eb48a0bd8f", + "sha256:a79db6a7db4ff91e7c5238d020d85aee1f4849ea357236899f9ed1773c5b66b4", + "sha256:a927f60735fcb5c19586c846c5f28da5edf8549142e4dd62ddf4b9579800a23c", + "sha256:ae4cfd9e023702609c59f5535d95d7b19d54d42902514fe4ece8792b65b3a0af", + "sha256:b021d3321107cdeba427a514d4faa35429525192e902e5b6608f346ef5ba5c8a", + "sha256:b3ac3401ae1945f3dab138819f58830fd658410aa2a53583c0a9af3e8809117d", + "sha256:b637e58757a9153ad562b530b82140dad5e505ae14d806b264a0802f343bd5dd", + "sha256:b711ee17b8676f367282ee654b8de750e2dfa2262e2eb07b7178b1524a273d44", + "sha256:b7e51d0d48153ece2db2c4e6bb2a71e781879027201dc7b718b3f27130547410", + "sha256:b8a1986ba41f6cf61f1234779ed492d026f87ab327cc6bf9e82d2e7a3f0b5b9c", + "sha256:c9da20d5e16f246861158b15cc908797ee6ceb5a799c8a3b97fe6c665627f0e5", + "sha256:dd156961934f7869aecfdf68da6f3f0fa48ad01923d64e9662038dff83f314d4", + "sha256:e149711b256fa8facbbce09b503a744c10fc03325742a9399c69c8569f0e9fe8", + "sha256:ece7f7ec03997357d61c44c50e6543123c0b7c2bdedc972b165d6832bf8868ad", + "sha256:ef36cb399ebc0941f68a4d3a675b13ad75a6037270ec3915ee337227b8bfec90", + "sha256:f1bfdbb37ada30bf6a08671a530e46ab24426bfad61efd28e5dc2beeb4f5b78d", + "sha256:f1c25e52e963dbe23a3ebc79ab904705eddcc15e14093fcde5059251090f01a6", + "sha256:f532d4c65c6ed6202b2c8bfc166648ec2c2ec2dc1d0fb06de643e87ce0a222c8", + "sha256:f559281d181c30ba14f0446a9e1a1ea6c4980792d7249bacbc575fcbcebde4b3", + "sha256:f5eccca127169257d8356069d298701fc612b05f6b768aa9ffc6e652c5169bd6", + "sha256:fa660370fe5b5e4f3c3952732aea358540e56e91c9233d55a6b6e508e047b315", + "sha256:fff79a30845c2591718cb8798196d117402b2d5d7506b5f3bb691972731c30b3" + ], + "version": "==0.14.0" } }, "develop": {} From 527d3ce0bbe6ff19aaa4ad17e5abbbff5e022db0 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Thu, 5 Nov 2020 14:42:30 +0100 Subject: [PATCH 108/124] move headers to include directory --- src/backend/columnar/cstore.c | 2 +- src/backend/columnar/cstore_compression.c | 2 +- src/backend/columnar/cstore_customscan.c | 6 +++--- src/backend/columnar/cstore_fdw.c | 6 +++--- src/backend/columnar/cstore_metadata_tables.c | 4 ++-- src/backend/columnar/cstore_reader.c | 4 ++-- src/backend/columnar/cstore_tableam.c | 8 ++++---- src/backend/columnar/cstore_writer.c | 4 ++-- src/backend/columnar/mod.c | 8 ++++---- src/{backend => include}/columnar/cstore.h | 0 src/{backend => include}/columnar/cstore_customscan.h | 0 src/{backend => include}/columnar/cstore_fdw.h | 0 src/{backend => include}/columnar/cstore_tableam.h | 0 src/{backend => include}/columnar/cstore_version_compat.h | 0 src/{backend => include}/columnar/mod.h | 0 15 files changed, 22 insertions(+), 22 deletions(-) rename src/{backend => include}/columnar/cstore.h (100%) rename src/{backend => include}/columnar/cstore_customscan.h (100%) rename src/{backend => include}/columnar/cstore_fdw.h (100%) rename src/{backend => include}/columnar/cstore_tableam.h (100%) rename src/{backend => include}/columnar/cstore_version_compat.h (100%) rename src/{backend => include}/columnar/mod.h (100%) diff --git a/src/backend/columnar/cstore.c b/src/backend/columnar/cstore.c index a724a62a0..d81077bbc 100644 --- a/src/backend/columnar/cstore.c +++ b/src/backend/columnar/cstore.c @@ -20,7 +20,7 @@ #include "utils/guc.h" #include "utils/rel.h" -#include "cstore.h" +#include "columnar/cstore.h" /* Default values for option parameters */ #define DEFAULT_COMPRESSION_TYPE COMPRESSION_NONE diff --git a/src/backend/columnar/cstore_compression.c b/src/backend/columnar/cstore_compression.c index f36d8dd04..a82faaf52 100644 --- a/src/backend/columnar/cstore_compression.c +++ b/src/backend/columnar/cstore_compression.c @@ -19,7 +19,7 @@ #include "utils/pg_lzcompress.h" #endif -#include "cstore.h" +#include "columnar/cstore.h" #if PG_VERSION_NUM >= 90500 diff --git a/src/backend/columnar/cstore_customscan.c b/src/backend/columnar/cstore_customscan.c index 7c163e5c9..075061caf 100644 --- a/src/backend/columnar/cstore_customscan.c +++ b/src/backend/columnar/cstore_customscan.c @@ -22,9 +22,9 @@ #include "optimizer/restrictinfo.h" #include "utils/relcache.h" -#include "cstore.h" -#include "cstore_customscan.h" -#include "cstore_tableam.h" +#include "columnar/cstore.h" +#include "columnar/cstore_customscan.h" +#include "columnar/cstore_tableam.h" typedef struct CStoreScanPath { diff --git a/src/backend/columnar/cstore_fdw.c b/src/backend/columnar/cstore_fdw.c index c2497fd27..79af4f3bd 100644 --- a/src/backend/columnar/cstore_fdw.c +++ b/src/backend/columnar/cstore_fdw.c @@ -75,9 +75,9 @@ #endif #include "utils/syscache.h" -#include "cstore.h" -#include "cstore_fdw.h" -#include "cstore_version_compat.h" +#include "columnar/cstore.h" +#include "columnar/cstore_fdw.h" +#include "columnar/cstore_version_compat.h" /* table containing information about how to partition distributed tables */ #define CITUS_EXTENSION_NAME "citus" diff --git a/src/backend/columnar/cstore_metadata_tables.c b/src/backend/columnar/cstore_metadata_tables.c index a2eab1940..1d6f36c02 100644 --- a/src/backend/columnar/cstore_metadata_tables.c +++ b/src/backend/columnar/cstore_metadata_tables.c @@ -9,8 +9,8 @@ #include "postgres.h" -#include "cstore.h" -#include "cstore_version_compat.h" +#include "columnar/cstore.h" +#include "columnar/cstore_version_compat.h" #include #include "access/heapam.h" diff --git a/src/backend/columnar/cstore_reader.c b/src/backend/columnar/cstore_reader.c index c86021f7e..b46b59729 100644 --- a/src/backend/columnar/cstore_reader.c +++ b/src/backend/columnar/cstore_reader.c @@ -34,8 +34,8 @@ #include "utils/lsyscache.h" #include "utils/rel.h" -#include "cstore.h" -#include "cstore_version_compat.h" +#include "columnar/cstore.h" +#include "columnar/cstore_version_compat.h" /* static function declarations */ static StripeBuffers * LoadFilteredStripeBuffers(Relation relation, diff --git a/src/backend/columnar/cstore_tableam.c b/src/backend/columnar/cstore_tableam.c index ce7d7de97..a8a87425f 100644 --- a/src/backend/columnar/cstore_tableam.c +++ b/src/backend/columnar/cstore_tableam.c @@ -42,10 +42,10 @@ #include "utils/rel.h" #include "utils/syscache.h" -#include "cstore.h" -#include "cstore_customscan.h" -#include "cstore_tableam.h" -#include "cstore_version_compat.h" +#include "columnar/cstore.h" +#include "columnar/cstore_customscan.h" +#include "columnar/cstore_tableam.h" +#include "columnar/cstore_version_compat.h" #define CSTORE_TABLEAM_NAME "cstore_tableam" diff --git a/src/backend/columnar/cstore_writer.c b/src/backend/columnar/cstore_writer.c index 9ca8c806e..873cda956 100644 --- a/src/backend/columnar/cstore_writer.c +++ b/src/backend/columnar/cstore_writer.c @@ -24,8 +24,8 @@ #include "utils/memutils.h" #include "utils/rel.h" -#include "cstore.h" -#include "cstore_version_compat.h" +#include "columnar/cstore.h" +#include "columnar/cstore_version_compat.h" static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, uint32 blockRowCount, diff --git a/src/backend/columnar/mod.c b/src/backend/columnar/mod.c index e81f7a6e5..2344a5d68 100644 --- a/src/backend/columnar/mod.c +++ b/src/backend/columnar/mod.c @@ -15,15 +15,15 @@ #include "fmgr.h" -#include "cstore.h" -#include "mod.h" +#include "columnar/cstore.h" +#include "columnar/mod.h" #ifdef USE_TABLEAM -#include "cstore_tableam.h" +#include "columnar/cstore_tableam.h" #endif #ifdef USE_FDW -#include "cstore_fdw.h" +#include "columnar/cstore_fdw.h" #endif PG_MODULE_MAGIC; diff --git a/src/backend/columnar/cstore.h b/src/include/columnar/cstore.h similarity index 100% rename from src/backend/columnar/cstore.h rename to src/include/columnar/cstore.h diff --git a/src/backend/columnar/cstore_customscan.h b/src/include/columnar/cstore_customscan.h similarity index 100% rename from src/backend/columnar/cstore_customscan.h rename to src/include/columnar/cstore_customscan.h diff --git a/src/backend/columnar/cstore_fdw.h b/src/include/columnar/cstore_fdw.h similarity index 100% rename from src/backend/columnar/cstore_fdw.h rename to src/include/columnar/cstore_fdw.h diff --git a/src/backend/columnar/cstore_tableam.h b/src/include/columnar/cstore_tableam.h similarity index 100% rename from src/backend/columnar/cstore_tableam.h rename to src/include/columnar/cstore_tableam.h diff --git a/src/backend/columnar/cstore_version_compat.h b/src/include/columnar/cstore_version_compat.h similarity index 100% rename from src/backend/columnar/cstore_version_compat.h rename to src/include/columnar/cstore_version_compat.h diff --git a/src/backend/columnar/mod.h b/src/include/columnar/mod.h similarity index 100% rename from src/backend/columnar/mod.h rename to src/include/columnar/mod.h From 30fbd877e7961ea828bd548b32363a5cc1574d4e Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 17 Nov 2020 18:26:21 +0100 Subject: [PATCH 109/124] remove readme that has outdated info --- src/backend/columnar/README.md | 373 --------------------------------- 1 file changed, 373 deletions(-) delete mode 100644 src/backend/columnar/README.md diff --git a/src/backend/columnar/README.md b/src/backend/columnar/README.md deleted file mode 100644 index 1a20f3abe..000000000 --- a/src/backend/columnar/README.md +++ /dev/null @@ -1,373 +0,0 @@ -cstore_fdw -========== - -[![Build Status](https://travis-ci.org/citusdata/cstore_fdw.svg?branch=master)][status] -[![Coverage](http://img.shields.io/coveralls/citusdata/cstore_fdw/master.svg)][coverage] - -Cstore_fdw is an open source columnar store extension for PostgreSQL. Columnar stores provide notable benefits for analytics use cases where data is loaded in batches. Cstore_fdw’s columnar nature delivers performance by only reading relevant data from disk, and it may compress data 6x-10x to reduce space requirements for data archival. - -Cstore_fdw is developed by [Citus Data](https://www.citusdata.com) and can be used in combination with [Citus](https://github.com/citusdata/citus), a postgres extension that intelligently distributes your data and queries across many nodes so your database can scale and your queries are fast. If you have any questions about how Citus can help you scale or how to use Citus in combination with cstore_fdw, [please let us know](https://www.citusdata.com/about/contact_us/). - -Join the [Mailing List][mailing-list] to stay on top of the latest developments for Cstore_fdw. - - -Introduction ------------- - -This extension uses a format for its data layout that is inspired by ORC, -the Optimized Row Columnar format. Like ORC, the cstore format improves -upon RCFile developed at Facebook, and brings the following benefits: - -* Compression: Reduces in-memory and on-disk data size by 2-4x. Can be extended - to support different codecs. -* Column projections: Only reads column data relevant to the query. Improves - performance for I/O bound queries. -* Skip indexes: Stores min/max statistics for row groups, and uses them to skip - over unrelated rows. - -Further, we used the Postgres foreign data wrapper APIs and type representations -with this extension. This brings: - -* Support for 40+ Postgres data types. The user can also create new types and - use them. -* Statistics collection. PostgreSQL's query optimizer uses these stats to - evaluate different query plans and pick the best one. -* Simple setup. Create foreign table and copy data. Run SQL. - - -Building --------- - -cstore\_fdw depends on protobuf-c for serializing and deserializing table metadata. -So we need to install these packages first: - - # Fedora 17+, CentOS, and Amazon Linux - sudo yum install protobuf-c-devel - - # Ubuntu 10.4+ - sudo apt-get install protobuf-c-compiler - sudo apt-get install libprotobuf-c0-dev - - # Ubuntu 18.4+ - sudo apt-get install protobuf-c-compiler - sudo apt-get install libprotobuf-c-dev - - # Mac OS X - brew install protobuf-c - -**Note.** In CentOS 5, 6, and 7, you may need to install or update EPEL 5, 6, or 7 repositories. - See [this page](https://support.rackspace.com/how-to/install-epel-and-additional-repositories-on-centos-and-red-hat/) -for instructions. - -**Note.** In Amazon Linux, the EPEL repository is installed by default, but not -enabled. See [these instructions](http://aws.amazon.com/amazon-linux-ami/faqs/#epel) -for how to enable it. - -Once you have protobuf-c installed on your machine, you are ready to build -cstore\_fdw. For this, you need to include the pg\_config directory path in -your make command. This path is typically the same as your PostgreSQL -installation's bin/ directory path. For example: - - PATH=/usr/local/pgsql/bin/:$PATH make - sudo PATH=/usr/local/pgsql/bin/:$PATH make install - -**Note.** cstore_fdw requires PostgreSQL version from 9.3 to 12. It doesn't -support earlier versions of PostgreSQL. - - -Usage ------ - -Before using cstore\_fdw, you need to add it to ```shared_preload_libraries``` -in your ```postgresql.conf``` and restart Postgres: - - shared_preload_libraries = 'cstore_fdw' # (change requires restart) - -The following parameters can be set on a cstore foreign table object. - -* filename (optional): The absolute path to the location for storing table data. - If you don't specify the filename option, cstore\_fdw will automatically - choose the $PGDATA/cstore\_fdw directory to store the files. If specified the - value of this parameter will be used as a prefix for all files created to - store table data. For example, the value ```/cstore_fdw/my_table``` could result in - the files ```/cstore_fdw/my_table``` and ```/cstore_fdw/my_table.footer``` being used - to manage table data. -* compression (optional): The compression used for compressing value streams. - Valid options are ```none``` and ```pglz```. The default is ```none```. -* stripe\_row\_count (optional): Number of rows per stripe. The default is - ```150000```. Reducing this decreases the amount memory used for loading data - and querying, but also decreases the performance. -* block\_row\_count (optional): Number of rows per column block. The default is - ```10000```. cstore\_fdw compresses, creates skip indexes, and reads from disk - at the block granularity. Increasing this value helps with compression and results - in fewer reads from disk. However, higher values also reduce the probability of - skipping over unrelated row blocks. - - -To load or append data into a cstore table, you have two options: - -* You can use the [```COPY``` command][copy-command] to load or append data from - a file, a program, or STDIN. -* You can use the ```INSERT INTO cstore_table SELECT ...``` syntax to load or - append data from another table. - -You can use the [```ANALYZE``` command][analyze-command] to collect statistics -about the table. These statistics help the query planner to help determine the -most efficient execution plan for each query. - -**Note.** We currently don't support updating table using DELETE, and UPDATE -commands. We also don't support single row inserts. - - -Updating from earlier versions to 1.7 ---------------------------------------- - -To update an existing cstore_fdw installation from versions earlier than 1.6 -you can take the following steps: - -* Download and install cstore_fdw version 1.6 using instructions from the "Building" - section, -* Restart the PostgreSQL server, -* Run ```ALTER EXTENSION cstore_fdw UPDATE;``` - - -Example -------- - -As an example, we demonstrate loading and querying data to/from a column store -table from scratch here. Let's start with downloading and decompressing the data -files. - - wget http://examples.citusdata.com/customer_reviews_1998.csv.gz - wget http://examples.citusdata.com/customer_reviews_1999.csv.gz - - gzip -d customer_reviews_1998.csv.gz - gzip -d customer_reviews_1999.csv.gz - -Then, let's log into Postgres, and run the following commands to create a column -store foreign table: - -```SQL --- load extension first time after install -CREATE EXTENSION cstore_fdw; - --- create server object -CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; - --- create foreign table -CREATE FOREIGN TABLE customer_reviews -( - customer_id TEXT, - review_date DATE, - review_rating INTEGER, - review_votes INTEGER, - review_helpful_votes INTEGER, - product_id CHAR(10), - product_title TEXT, - product_sales_rank BIGINT, - product_group TEXT, - product_category TEXT, - product_subcategory TEXT, - similar_product_ids CHAR(10)[] -) -SERVER cstore_server -OPTIONS(compression 'pglz'); -``` - -Next, we load data into the table: - -```SQL -\COPY customer_reviews FROM 'customer_reviews_1998.csv' WITH CSV; -\COPY customer_reviews FROM 'customer_reviews_1999.csv' WITH CSV; -``` - -**Note.** If you are getting ```ERROR: cannot copy to foreign table -"customer_reviews"``` when trying to run the COPY commands, double check that you -have added cstore\_fdw to ```shared_preload_libraries``` in ```postgresql.conf``` -and restarted Postgres. - -Next, we collect data distribution statistics about the table. This is optional, -but usually very helpful: - -```SQL -ANALYZE customer_reviews; -``` - -Finally, let's run some example SQL queries on the column store table. - -```SQL --- Find all reviews a particular customer made on the Dune series in 1998. -SELECT - customer_id, review_date, review_rating, product_id, product_title -FROM - customer_reviews -WHERE - customer_id ='A27T7HVDXA3K2A' AND - product_title LIKE '%Dune%' AND - review_date >= '1998-01-01' AND - review_date <= '1998-12-31'; - --- Do we have a correlation between a book's title's length and its review ratings? -SELECT - width_bucket(length(product_title), 1, 50, 5) title_length_bucket, - round(avg(review_rating), 2) AS review_average, - count(*) -FROM - customer_reviews -WHERE - product_group = 'Book' -GROUP BY - title_length_bucket -ORDER BY - title_length_bucket; -``` - - -Usage with Citus ----------------- - -The example above illustrated how to load data into a PostgreSQL database running -on a single host. However, sometimes your data is too large to analyze effectively -on a single host. Citus is a product built by Citus Data that allows you to run -a distributed PostgreSQL database to analyze your data using the power of multiple -hosts. You can easily install and run other PostgreSQL extensions and foreign data -wrappers—including cstore_fdw—alongside Citus. - -You can create a cstore_fdw table and distribute it using the -```create_distributed_table()``` UDF just like any other table. You can load data -using the ```copy``` command as you would do in single node PostgreSQL. - -Using Skip Indexes ------------------- - -cstore_fdw partitions each column into multiple blocks. Skip indexes store minimum -and maximum values for each of these blocks. While scanning the table, if min/max -values of the block contradict the WHERE clause, then the block is completely -skipped. This way, the query processes less data and hence finishes faster. - -To use skip indexes more efficiently, you should load the data after sorting it -on a column that is commonly used in the WHERE clause. This ensures that there is -a minimum overlap between blocks and the chance of them being skipped is higher. - -In practice, the data generally has an inherent dimension (for example a time field) -on which it is naturally sorted. Usually, the queries also have a filter clause on -that column (for example you want to query only the last week's data), and hence you -don't need to sort the data in such cases. - - -Uninstalling cstore_fdw ------------------------ - -Before uninstalling the extension, first you need to drop all the cstore tables: - - postgres=# DROP FOREIGN TABLE cstore_table_1; - ... - postgres=# DROP FOREIGN TABLE cstore_table_n; - -Then, you should drop the cstore server and extension: - - postgres=# DROP SERVER cstore_server; - postgres=# DROP EXTENSION cstore_fdw; - -cstore\_fdw automatically creates some directories inside the PostgreSQL's data -directory to store its files. To remove them, you can run: - - $ rm -rf $PGDATA/cstore_fdw - -Then, you should remove cstore\_fdw from ```shared_preload_libraries``` in -your ```postgresql.conf```: - - shared_preload_libraries = '' # (change requires restart) - -Finally, to uninstall the extension you can run the following command in the -extension's source code directory. This will clean up all the files copied during -the installation: - - $ sudo PATH=/usr/local/pgsql/bin/:$PATH make uninstall - - -Changeset ---------- -### Version 1.7.0 -* (Fix) Add support for PostgreSQL 12 -* (Fix) Support count(t.*) from t type queries -* (Fix) Build failures for MacOS 10.14+ -* (Fix) Make foreign scan parallel safe -* (Fix) Add support for PostgreSQL 11 COPY -### Version 1.6.2 -* (Fix) Add support for PostgreSQL 11 -### Version 1.6.1 -* (Fix) Fix crash during truncate (Cstore crashing server when enabled, not used) -* (Fix) No such file or directory warning when attempting to drop database -### Version 1.6 -* (Feature) Added support for PostgreSQL 10. -* (Fix) Removed table files when a schema, extension or database is dropped. -* (Fix) Removed unused code fragments. -* (Fix) Fixed incorrect initialization of stripe buffers. -* (Fix) Checked user access rights when executing truncate. -* (Fix) Made copy command cancellable. -* (Fix) Fixed namespace issue regarding drop table. - -### Version 1.5.1 -* (Fix) Verify cstore_fdw server on CREATE FOREIGN TABLE command - -### Version 1.5 -* (Feature) Added support for PostgreSQL 9.6. -* (Fix) Removed table data when cstore_fdw table is indirectly dropped. -* (Fix) Removed unused code fragments. -* (Fix) Fixed column selection logic to return columns used in expressions. -* (Fix) Prevented alter table command from changinf column type to incompatible types. - -### Version 1.4.1 - -* (Fix) Compatibility fix for Citus [copy command][copy-command]. - -### Version 1.4 - -* (Feature) Added support for ```TRUNCATE TABLE``` -* (Fix) Added support for PostgreSQL 9.5 - -### Version 1.3 - -* (Feature) Added support for ```ALTER TABLE ADD COLUMN``` and ```ALTER TABLE DROP COLUMN```. -* (Feature) Added column list support in ```COPY FROM```. -* (Optimization) Improve row count estimation, which results in better plans. -* (Fix) Fix the deadlock issue during concurrent inserts. -* (Fix) Return correct result when using whole row references. - -### Version 1.2 - -* (Feature) Added support for ```COPY TO```. -* (Feature) Added support for ```INSERT INTO cstore_table SELECT ...```. -* (Optimization) Improved memory usage. -* (Fix) Dropping multiple cstore tables in a single command cleans-up files - of all them. - -### Version 1.1 - -* (Feature) Make filename option optional, and use a default directory inside - $PGDATA to manage cstore tables. -* (Feature) Automatically delete files on DROP FOREIGN TABLE. -* (Fix) Return empty table if no data has been loaded. Previously, cstore_fdw - errored out. -* (Fix) Fix overestimating relation column counts when planning. -* (Feature) Added cstore\_table\_size(tablename) for getting the size of a cstore - table in bytes. - - -Copyright ---------- - -Copyright (c) 2017 Citus Data, Inc. - -This module is free software; you can redistribute it and/or modify it under the -Apache v2.0 License. - -For all types of questions and comments about the wrapper, please contact us at -engage @ citusdata.com. - -[status]: https://travis-ci.org/citusdata/cstore_fdw -[mailing-list]: https://groups.google.com/forum/#!forum/cstore-users -[coverage]: https://coveralls.io/r/citusdata/cstore_fdw -[copy-command]: http://www.postgresql.org/docs/current/static/sql-copy.html -[analyze-command]: http://www.postgresql.org/docs/current/static/sql-analyze.html From f89bd3eeb5e8c1c4c5f1a981e0415322769e629a Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Thu, 5 Nov 2020 14:46:45 +0100 Subject: [PATCH 110/124] move columnar test files --- src/backend/columnar/expected/am_create.out | 20 ------------------- .../regress}/data/array_types.csv | 0 .../regress}/data/block_filtering.csv | 0 .../regress}/data/contestants.1.csv | 0 .../regress}/data/contestants.2.csv | 0 .../regress}/data/datetime_types.csv | 0 .../data/enum_and_composite_types.csv | 0 .../regress}/data/null_values.csv | 0 .../regress}/data/other_types.csv | 0 .../regress}/data/range_types.csv | 0 .../regress}/expected/am_alter.out | 0 .../regress}/expected/am_analyze.out | 0 .../regress}/expected/am_clean.out | 0 .../regress}/expected/am_drop.out | 0 .../regress}/expected/am_functions.out | 0 .../regress}/expected/am_insert.out | 0 .../regress}/expected/am_join.out | 0 .../regress}/expected/am_query.out | 0 .../regress}/expected/am_rollback.out | 0 .../regress}/expected/am_tableoptions.out | 0 .../regress}/expected/am_trigger.out | 0 .../regress}/expected/am_truncate.out | 0 .../regress}/expected/am_truncate_0.out | 0 .../regress}/expected/am_vacuum.out | 0 .../regress}/expected/am_vacuum_vs_insert.out | 0 .../expected/am_write_concurrency.out | 0 .../regress}/expected/create.out | 0 .../regress}/expected/extension_create.out | 0 .../regress}/expected/fdw_alter.out | 0 .../regress}/expected/fdw_analyze.out | 0 .../regress}/expected/fdw_clean.out | 0 .../regress}/expected/fdw_drop.out | 0 .../regress}/expected/fdw_functions.out | 0 .../regress}/expected/fdw_insert.out | 0 .../regress}/expected/fdw_query.out | 0 .../regress}/expected/fdw_rollback.out | 0 .../regress}/expected/fdw_truncate.out | 0 .../regress}/expected/fdw_truncate_0.out | 0 .../regress}/input/am_block_filtering.source | 0 .../regress}/input/am_copyto.source | 0 .../regress}/input/am_create.source | 0 .../regress}/input/am_data_types.source | 0 .../regress}/input/am_load.source | 0 .../regress}/input/fdw_block_filtering.source | 0 .../regress}/input/fdw_copyto.source | 0 .../regress}/input/fdw_create.source | 0 .../regress}/input/fdw_data_types.source | 0 .../regress}/input/fdw_load.source | 0 .../regress}/output/am_block_filtering.source | 0 .../regress}/output/am_copyto.source | 0 .../regress}/output/am_create.source | 0 .../regress}/output/am_data_types.source | 0 .../regress}/output/am_load.source | 0 .../output/fdw_block_filtering.source | 0 .../regress}/output/fdw_copyto.source | 0 .../regress}/output/fdw_create.source | 0 .../regress}/output/fdw_data_types.source | 0 .../regress}/output/fdw_load.source | 0 .../regress/spec}/am_vacuum_vs_insert.spec | 0 .../regress/spec}/am_write_concurrency.spec | 0 .../specs => test/regress/spec}/create.spec | 0 .../regress}/sql/am_alter.sql | 0 .../regress}/sql/am_analyze.sql | 0 .../regress}/sql/am_clean.sql | 0 .../columnar => test/regress}/sql/am_drop.sql | 0 .../regress}/sql/am_functions.sql | 0 .../regress}/sql/am_insert.sql | 0 .../columnar => test/regress}/sql/am_join.sql | 0 .../regress}/sql/am_query.sql | 0 .../regress}/sql/am_rollback.sql | 0 .../regress}/sql/am_tableoptions.sql | 0 .../regress}/sql/am_trigger.sql | 0 .../regress}/sql/am_truncate.sql | 0 .../regress}/sql/am_vacuum.sql | 0 .../regress}/sql/extension_create.sql | 0 .../regress}/sql/fdw_alter.sql | 0 .../regress}/sql/fdw_analyze.sql | 0 .../regress}/sql/fdw_clean.sql | 0 .../regress}/sql/fdw_drop.sql | 0 .../regress}/sql/fdw_functions.sql | 0 .../regress}/sql/fdw_insert.sql | 0 .../regress}/sql/fdw_query.sql | 0 .../regress}/sql/fdw_rollback.sql | 0 .../regress}/sql/fdw_truncate.sql | 0 84 files changed, 20 deletions(-) delete mode 100644 src/backend/columnar/expected/am_create.out rename src/{backend/columnar => test/regress}/data/array_types.csv (100%) rename src/{backend/columnar => test/regress}/data/block_filtering.csv (100%) rename src/{backend/columnar => test/regress}/data/contestants.1.csv (100%) rename src/{backend/columnar => test/regress}/data/contestants.2.csv (100%) rename src/{backend/columnar => test/regress}/data/datetime_types.csv (100%) rename src/{backend/columnar => test/regress}/data/enum_and_composite_types.csv (100%) rename src/{backend/columnar => test/regress}/data/null_values.csv (100%) rename src/{backend/columnar => test/regress}/data/other_types.csv (100%) rename src/{backend/columnar => test/regress}/data/range_types.csv (100%) rename src/{backend/columnar => test/regress}/expected/am_alter.out (100%) rename src/{backend/columnar => test/regress}/expected/am_analyze.out (100%) rename src/{backend/columnar => test/regress}/expected/am_clean.out (100%) rename src/{backend/columnar => test/regress}/expected/am_drop.out (100%) rename src/{backend/columnar => test/regress}/expected/am_functions.out (100%) rename src/{backend/columnar => test/regress}/expected/am_insert.out (100%) rename src/{backend/columnar => test/regress}/expected/am_join.out (100%) rename src/{backend/columnar => test/regress}/expected/am_query.out (100%) rename src/{backend/columnar => test/regress}/expected/am_rollback.out (100%) rename src/{backend/columnar => test/regress}/expected/am_tableoptions.out (100%) rename src/{backend/columnar => test/regress}/expected/am_trigger.out (100%) rename src/{backend/columnar => test/regress}/expected/am_truncate.out (100%) rename src/{backend/columnar => test/regress}/expected/am_truncate_0.out (100%) rename src/{backend/columnar => test/regress}/expected/am_vacuum.out (100%) rename src/{backend/columnar => test/regress}/expected/am_vacuum_vs_insert.out (100%) rename src/{backend/columnar => test/regress}/expected/am_write_concurrency.out (100%) rename src/{backend/columnar => test/regress}/expected/create.out (100%) rename src/{backend/columnar => test/regress}/expected/extension_create.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_alter.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_analyze.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_clean.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_drop.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_functions.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_insert.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_query.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_rollback.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_truncate.out (100%) rename src/{backend/columnar => test/regress}/expected/fdw_truncate_0.out (100%) rename src/{backend/columnar => test/regress}/input/am_block_filtering.source (100%) rename src/{backend/columnar => test/regress}/input/am_copyto.source (100%) rename src/{backend/columnar => test/regress}/input/am_create.source (100%) rename src/{backend/columnar => test/regress}/input/am_data_types.source (100%) rename src/{backend/columnar => test/regress}/input/am_load.source (100%) rename src/{backend/columnar => test/regress}/input/fdw_block_filtering.source (100%) rename src/{backend/columnar => test/regress}/input/fdw_copyto.source (100%) rename src/{backend/columnar => test/regress}/input/fdw_create.source (100%) rename src/{backend/columnar => test/regress}/input/fdw_data_types.source (100%) rename src/{backend/columnar => test/regress}/input/fdw_load.source (100%) rename src/{backend/columnar => test/regress}/output/am_block_filtering.source (100%) rename src/{backend/columnar => test/regress}/output/am_copyto.source (100%) rename src/{backend/columnar => test/regress}/output/am_create.source (100%) rename src/{backend/columnar => test/regress}/output/am_data_types.source (100%) rename src/{backend/columnar => test/regress}/output/am_load.source (100%) rename src/{backend/columnar => test/regress}/output/fdw_block_filtering.source (100%) rename src/{backend/columnar => test/regress}/output/fdw_copyto.source (100%) rename src/{backend/columnar => test/regress}/output/fdw_create.source (100%) rename src/{backend/columnar => test/regress}/output/fdw_data_types.source (100%) rename src/{backend/columnar => test/regress}/output/fdw_load.source (100%) rename src/{backend/columnar/specs => test/regress/spec}/am_vacuum_vs_insert.spec (100%) rename src/{backend/columnar/specs => test/regress/spec}/am_write_concurrency.spec (100%) rename src/{backend/columnar/specs => test/regress/spec}/create.spec (100%) rename src/{backend/columnar => test/regress}/sql/am_alter.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_analyze.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_clean.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_drop.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_functions.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_insert.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_join.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_query.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_rollback.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_tableoptions.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_trigger.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_truncate.sql (100%) rename src/{backend/columnar => test/regress}/sql/am_vacuum.sql (100%) rename src/{backend/columnar => test/regress}/sql/extension_create.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_alter.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_analyze.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_clean.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_drop.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_functions.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_insert.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_query.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_rollback.sql (100%) rename src/{backend/columnar => test/regress}/sql/fdw_truncate.sql (100%) diff --git a/src/backend/columnar/expected/am_create.out b/src/backend/columnar/expected/am_create.out deleted file mode 100644 index 47c6a6c44..000000000 --- a/src/backend/columnar/expected/am_create.out +++ /dev/null @@ -1,20 +0,0 @@ --- --- Test the CREATE statements related to cstore. --- --- Create uncompressed table -CREATE TABLE contestant (handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; --- Create compressed table with automatically determined file path --- COMPRESSED -CREATE TABLE contestant_compressed (handle TEXT, birthdate DATE, rating INT, - percentile FLOAT, country CHAR(3), achievements TEXT[]) - USING cstore_tableam; --- Test that querying an empty table works -ANALYZE contestant; -SELECT count(*) FROM contestant; - count -------- - 0 -(1 row) - diff --git a/src/backend/columnar/data/array_types.csv b/src/test/regress/data/array_types.csv similarity index 100% rename from src/backend/columnar/data/array_types.csv rename to src/test/regress/data/array_types.csv diff --git a/src/backend/columnar/data/block_filtering.csv b/src/test/regress/data/block_filtering.csv similarity index 100% rename from src/backend/columnar/data/block_filtering.csv rename to src/test/regress/data/block_filtering.csv diff --git a/src/backend/columnar/data/contestants.1.csv b/src/test/regress/data/contestants.1.csv similarity index 100% rename from src/backend/columnar/data/contestants.1.csv rename to src/test/regress/data/contestants.1.csv diff --git a/src/backend/columnar/data/contestants.2.csv b/src/test/regress/data/contestants.2.csv similarity index 100% rename from src/backend/columnar/data/contestants.2.csv rename to src/test/regress/data/contestants.2.csv diff --git a/src/backend/columnar/data/datetime_types.csv b/src/test/regress/data/datetime_types.csv similarity index 100% rename from src/backend/columnar/data/datetime_types.csv rename to src/test/regress/data/datetime_types.csv diff --git a/src/backend/columnar/data/enum_and_composite_types.csv b/src/test/regress/data/enum_and_composite_types.csv similarity index 100% rename from src/backend/columnar/data/enum_and_composite_types.csv rename to src/test/regress/data/enum_and_composite_types.csv diff --git a/src/backend/columnar/data/null_values.csv b/src/test/regress/data/null_values.csv similarity index 100% rename from src/backend/columnar/data/null_values.csv rename to src/test/regress/data/null_values.csv diff --git a/src/backend/columnar/data/other_types.csv b/src/test/regress/data/other_types.csv similarity index 100% rename from src/backend/columnar/data/other_types.csv rename to src/test/regress/data/other_types.csv diff --git a/src/backend/columnar/data/range_types.csv b/src/test/regress/data/range_types.csv similarity index 100% rename from src/backend/columnar/data/range_types.csv rename to src/test/regress/data/range_types.csv diff --git a/src/backend/columnar/expected/am_alter.out b/src/test/regress/expected/am_alter.out similarity index 100% rename from src/backend/columnar/expected/am_alter.out rename to src/test/regress/expected/am_alter.out diff --git a/src/backend/columnar/expected/am_analyze.out b/src/test/regress/expected/am_analyze.out similarity index 100% rename from src/backend/columnar/expected/am_analyze.out rename to src/test/regress/expected/am_analyze.out diff --git a/src/backend/columnar/expected/am_clean.out b/src/test/regress/expected/am_clean.out similarity index 100% rename from src/backend/columnar/expected/am_clean.out rename to src/test/regress/expected/am_clean.out diff --git a/src/backend/columnar/expected/am_drop.out b/src/test/regress/expected/am_drop.out similarity index 100% rename from src/backend/columnar/expected/am_drop.out rename to src/test/regress/expected/am_drop.out diff --git a/src/backend/columnar/expected/am_functions.out b/src/test/regress/expected/am_functions.out similarity index 100% rename from src/backend/columnar/expected/am_functions.out rename to src/test/regress/expected/am_functions.out diff --git a/src/backend/columnar/expected/am_insert.out b/src/test/regress/expected/am_insert.out similarity index 100% rename from src/backend/columnar/expected/am_insert.out rename to src/test/regress/expected/am_insert.out diff --git a/src/backend/columnar/expected/am_join.out b/src/test/regress/expected/am_join.out similarity index 100% rename from src/backend/columnar/expected/am_join.out rename to src/test/regress/expected/am_join.out diff --git a/src/backend/columnar/expected/am_query.out b/src/test/regress/expected/am_query.out similarity index 100% rename from src/backend/columnar/expected/am_query.out rename to src/test/regress/expected/am_query.out diff --git a/src/backend/columnar/expected/am_rollback.out b/src/test/regress/expected/am_rollback.out similarity index 100% rename from src/backend/columnar/expected/am_rollback.out rename to src/test/regress/expected/am_rollback.out diff --git a/src/backend/columnar/expected/am_tableoptions.out b/src/test/regress/expected/am_tableoptions.out similarity index 100% rename from src/backend/columnar/expected/am_tableoptions.out rename to src/test/regress/expected/am_tableoptions.out diff --git a/src/backend/columnar/expected/am_trigger.out b/src/test/regress/expected/am_trigger.out similarity index 100% rename from src/backend/columnar/expected/am_trigger.out rename to src/test/regress/expected/am_trigger.out diff --git a/src/backend/columnar/expected/am_truncate.out b/src/test/regress/expected/am_truncate.out similarity index 100% rename from src/backend/columnar/expected/am_truncate.out rename to src/test/regress/expected/am_truncate.out diff --git a/src/backend/columnar/expected/am_truncate_0.out b/src/test/regress/expected/am_truncate_0.out similarity index 100% rename from src/backend/columnar/expected/am_truncate_0.out rename to src/test/regress/expected/am_truncate_0.out diff --git a/src/backend/columnar/expected/am_vacuum.out b/src/test/regress/expected/am_vacuum.out similarity index 100% rename from src/backend/columnar/expected/am_vacuum.out rename to src/test/regress/expected/am_vacuum.out diff --git a/src/backend/columnar/expected/am_vacuum_vs_insert.out b/src/test/regress/expected/am_vacuum_vs_insert.out similarity index 100% rename from src/backend/columnar/expected/am_vacuum_vs_insert.out rename to src/test/regress/expected/am_vacuum_vs_insert.out diff --git a/src/backend/columnar/expected/am_write_concurrency.out b/src/test/regress/expected/am_write_concurrency.out similarity index 100% rename from src/backend/columnar/expected/am_write_concurrency.out rename to src/test/regress/expected/am_write_concurrency.out diff --git a/src/backend/columnar/expected/create.out b/src/test/regress/expected/create.out similarity index 100% rename from src/backend/columnar/expected/create.out rename to src/test/regress/expected/create.out diff --git a/src/backend/columnar/expected/extension_create.out b/src/test/regress/expected/extension_create.out similarity index 100% rename from src/backend/columnar/expected/extension_create.out rename to src/test/regress/expected/extension_create.out diff --git a/src/backend/columnar/expected/fdw_alter.out b/src/test/regress/expected/fdw_alter.out similarity index 100% rename from src/backend/columnar/expected/fdw_alter.out rename to src/test/regress/expected/fdw_alter.out diff --git a/src/backend/columnar/expected/fdw_analyze.out b/src/test/regress/expected/fdw_analyze.out similarity index 100% rename from src/backend/columnar/expected/fdw_analyze.out rename to src/test/regress/expected/fdw_analyze.out diff --git a/src/backend/columnar/expected/fdw_clean.out b/src/test/regress/expected/fdw_clean.out similarity index 100% rename from src/backend/columnar/expected/fdw_clean.out rename to src/test/regress/expected/fdw_clean.out diff --git a/src/backend/columnar/expected/fdw_drop.out b/src/test/regress/expected/fdw_drop.out similarity index 100% rename from src/backend/columnar/expected/fdw_drop.out rename to src/test/regress/expected/fdw_drop.out diff --git a/src/backend/columnar/expected/fdw_functions.out b/src/test/regress/expected/fdw_functions.out similarity index 100% rename from src/backend/columnar/expected/fdw_functions.out rename to src/test/regress/expected/fdw_functions.out diff --git a/src/backend/columnar/expected/fdw_insert.out b/src/test/regress/expected/fdw_insert.out similarity index 100% rename from src/backend/columnar/expected/fdw_insert.out rename to src/test/regress/expected/fdw_insert.out diff --git a/src/backend/columnar/expected/fdw_query.out b/src/test/regress/expected/fdw_query.out similarity index 100% rename from src/backend/columnar/expected/fdw_query.out rename to src/test/regress/expected/fdw_query.out diff --git a/src/backend/columnar/expected/fdw_rollback.out b/src/test/regress/expected/fdw_rollback.out similarity index 100% rename from src/backend/columnar/expected/fdw_rollback.out rename to src/test/regress/expected/fdw_rollback.out diff --git a/src/backend/columnar/expected/fdw_truncate.out b/src/test/regress/expected/fdw_truncate.out similarity index 100% rename from src/backend/columnar/expected/fdw_truncate.out rename to src/test/regress/expected/fdw_truncate.out diff --git a/src/backend/columnar/expected/fdw_truncate_0.out b/src/test/regress/expected/fdw_truncate_0.out similarity index 100% rename from src/backend/columnar/expected/fdw_truncate_0.out rename to src/test/regress/expected/fdw_truncate_0.out diff --git a/src/backend/columnar/input/am_block_filtering.source b/src/test/regress/input/am_block_filtering.source similarity index 100% rename from src/backend/columnar/input/am_block_filtering.source rename to src/test/regress/input/am_block_filtering.source diff --git a/src/backend/columnar/input/am_copyto.source b/src/test/regress/input/am_copyto.source similarity index 100% rename from src/backend/columnar/input/am_copyto.source rename to src/test/regress/input/am_copyto.source diff --git a/src/backend/columnar/input/am_create.source b/src/test/regress/input/am_create.source similarity index 100% rename from src/backend/columnar/input/am_create.source rename to src/test/regress/input/am_create.source diff --git a/src/backend/columnar/input/am_data_types.source b/src/test/regress/input/am_data_types.source similarity index 100% rename from src/backend/columnar/input/am_data_types.source rename to src/test/regress/input/am_data_types.source diff --git a/src/backend/columnar/input/am_load.source b/src/test/regress/input/am_load.source similarity index 100% rename from src/backend/columnar/input/am_load.source rename to src/test/regress/input/am_load.source diff --git a/src/backend/columnar/input/fdw_block_filtering.source b/src/test/regress/input/fdw_block_filtering.source similarity index 100% rename from src/backend/columnar/input/fdw_block_filtering.source rename to src/test/regress/input/fdw_block_filtering.source diff --git a/src/backend/columnar/input/fdw_copyto.source b/src/test/regress/input/fdw_copyto.source similarity index 100% rename from src/backend/columnar/input/fdw_copyto.source rename to src/test/regress/input/fdw_copyto.source diff --git a/src/backend/columnar/input/fdw_create.source b/src/test/regress/input/fdw_create.source similarity index 100% rename from src/backend/columnar/input/fdw_create.source rename to src/test/regress/input/fdw_create.source diff --git a/src/backend/columnar/input/fdw_data_types.source b/src/test/regress/input/fdw_data_types.source similarity index 100% rename from src/backend/columnar/input/fdw_data_types.source rename to src/test/regress/input/fdw_data_types.source diff --git a/src/backend/columnar/input/fdw_load.source b/src/test/regress/input/fdw_load.source similarity index 100% rename from src/backend/columnar/input/fdw_load.source rename to src/test/regress/input/fdw_load.source diff --git a/src/backend/columnar/output/am_block_filtering.source b/src/test/regress/output/am_block_filtering.source similarity index 100% rename from src/backend/columnar/output/am_block_filtering.source rename to src/test/regress/output/am_block_filtering.source diff --git a/src/backend/columnar/output/am_copyto.source b/src/test/regress/output/am_copyto.source similarity index 100% rename from src/backend/columnar/output/am_copyto.source rename to src/test/regress/output/am_copyto.source diff --git a/src/backend/columnar/output/am_create.source b/src/test/regress/output/am_create.source similarity index 100% rename from src/backend/columnar/output/am_create.source rename to src/test/regress/output/am_create.source diff --git a/src/backend/columnar/output/am_data_types.source b/src/test/regress/output/am_data_types.source similarity index 100% rename from src/backend/columnar/output/am_data_types.source rename to src/test/regress/output/am_data_types.source diff --git a/src/backend/columnar/output/am_load.source b/src/test/regress/output/am_load.source similarity index 100% rename from src/backend/columnar/output/am_load.source rename to src/test/regress/output/am_load.source diff --git a/src/backend/columnar/output/fdw_block_filtering.source b/src/test/regress/output/fdw_block_filtering.source similarity index 100% rename from src/backend/columnar/output/fdw_block_filtering.source rename to src/test/regress/output/fdw_block_filtering.source diff --git a/src/backend/columnar/output/fdw_copyto.source b/src/test/regress/output/fdw_copyto.source similarity index 100% rename from src/backend/columnar/output/fdw_copyto.source rename to src/test/regress/output/fdw_copyto.source diff --git a/src/backend/columnar/output/fdw_create.source b/src/test/regress/output/fdw_create.source similarity index 100% rename from src/backend/columnar/output/fdw_create.source rename to src/test/regress/output/fdw_create.source diff --git a/src/backend/columnar/output/fdw_data_types.source b/src/test/regress/output/fdw_data_types.source similarity index 100% rename from src/backend/columnar/output/fdw_data_types.source rename to src/test/regress/output/fdw_data_types.source diff --git a/src/backend/columnar/output/fdw_load.source b/src/test/regress/output/fdw_load.source similarity index 100% rename from src/backend/columnar/output/fdw_load.source rename to src/test/regress/output/fdw_load.source diff --git a/src/backend/columnar/specs/am_vacuum_vs_insert.spec b/src/test/regress/spec/am_vacuum_vs_insert.spec similarity index 100% rename from src/backend/columnar/specs/am_vacuum_vs_insert.spec rename to src/test/regress/spec/am_vacuum_vs_insert.spec diff --git a/src/backend/columnar/specs/am_write_concurrency.spec b/src/test/regress/spec/am_write_concurrency.spec similarity index 100% rename from src/backend/columnar/specs/am_write_concurrency.spec rename to src/test/regress/spec/am_write_concurrency.spec diff --git a/src/backend/columnar/specs/create.spec b/src/test/regress/spec/create.spec similarity index 100% rename from src/backend/columnar/specs/create.spec rename to src/test/regress/spec/create.spec diff --git a/src/backend/columnar/sql/am_alter.sql b/src/test/regress/sql/am_alter.sql similarity index 100% rename from src/backend/columnar/sql/am_alter.sql rename to src/test/regress/sql/am_alter.sql diff --git a/src/backend/columnar/sql/am_analyze.sql b/src/test/regress/sql/am_analyze.sql similarity index 100% rename from src/backend/columnar/sql/am_analyze.sql rename to src/test/regress/sql/am_analyze.sql diff --git a/src/backend/columnar/sql/am_clean.sql b/src/test/regress/sql/am_clean.sql similarity index 100% rename from src/backend/columnar/sql/am_clean.sql rename to src/test/regress/sql/am_clean.sql diff --git a/src/backend/columnar/sql/am_drop.sql b/src/test/regress/sql/am_drop.sql similarity index 100% rename from src/backend/columnar/sql/am_drop.sql rename to src/test/regress/sql/am_drop.sql diff --git a/src/backend/columnar/sql/am_functions.sql b/src/test/regress/sql/am_functions.sql similarity index 100% rename from src/backend/columnar/sql/am_functions.sql rename to src/test/regress/sql/am_functions.sql diff --git a/src/backend/columnar/sql/am_insert.sql b/src/test/regress/sql/am_insert.sql similarity index 100% rename from src/backend/columnar/sql/am_insert.sql rename to src/test/regress/sql/am_insert.sql diff --git a/src/backend/columnar/sql/am_join.sql b/src/test/regress/sql/am_join.sql similarity index 100% rename from src/backend/columnar/sql/am_join.sql rename to src/test/regress/sql/am_join.sql diff --git a/src/backend/columnar/sql/am_query.sql b/src/test/regress/sql/am_query.sql similarity index 100% rename from src/backend/columnar/sql/am_query.sql rename to src/test/regress/sql/am_query.sql diff --git a/src/backend/columnar/sql/am_rollback.sql b/src/test/regress/sql/am_rollback.sql similarity index 100% rename from src/backend/columnar/sql/am_rollback.sql rename to src/test/regress/sql/am_rollback.sql diff --git a/src/backend/columnar/sql/am_tableoptions.sql b/src/test/regress/sql/am_tableoptions.sql similarity index 100% rename from src/backend/columnar/sql/am_tableoptions.sql rename to src/test/regress/sql/am_tableoptions.sql diff --git a/src/backend/columnar/sql/am_trigger.sql b/src/test/regress/sql/am_trigger.sql similarity index 100% rename from src/backend/columnar/sql/am_trigger.sql rename to src/test/regress/sql/am_trigger.sql diff --git a/src/backend/columnar/sql/am_truncate.sql b/src/test/regress/sql/am_truncate.sql similarity index 100% rename from src/backend/columnar/sql/am_truncate.sql rename to src/test/regress/sql/am_truncate.sql diff --git a/src/backend/columnar/sql/am_vacuum.sql b/src/test/regress/sql/am_vacuum.sql similarity index 100% rename from src/backend/columnar/sql/am_vacuum.sql rename to src/test/regress/sql/am_vacuum.sql diff --git a/src/backend/columnar/sql/extension_create.sql b/src/test/regress/sql/extension_create.sql similarity index 100% rename from src/backend/columnar/sql/extension_create.sql rename to src/test/regress/sql/extension_create.sql diff --git a/src/backend/columnar/sql/fdw_alter.sql b/src/test/regress/sql/fdw_alter.sql similarity index 100% rename from src/backend/columnar/sql/fdw_alter.sql rename to src/test/regress/sql/fdw_alter.sql diff --git a/src/backend/columnar/sql/fdw_analyze.sql b/src/test/regress/sql/fdw_analyze.sql similarity index 100% rename from src/backend/columnar/sql/fdw_analyze.sql rename to src/test/regress/sql/fdw_analyze.sql diff --git a/src/backend/columnar/sql/fdw_clean.sql b/src/test/regress/sql/fdw_clean.sql similarity index 100% rename from src/backend/columnar/sql/fdw_clean.sql rename to src/test/regress/sql/fdw_clean.sql diff --git a/src/backend/columnar/sql/fdw_drop.sql b/src/test/regress/sql/fdw_drop.sql similarity index 100% rename from src/backend/columnar/sql/fdw_drop.sql rename to src/test/regress/sql/fdw_drop.sql diff --git a/src/backend/columnar/sql/fdw_functions.sql b/src/test/regress/sql/fdw_functions.sql similarity index 100% rename from src/backend/columnar/sql/fdw_functions.sql rename to src/test/regress/sql/fdw_functions.sql diff --git a/src/backend/columnar/sql/fdw_insert.sql b/src/test/regress/sql/fdw_insert.sql similarity index 100% rename from src/backend/columnar/sql/fdw_insert.sql rename to src/test/regress/sql/fdw_insert.sql diff --git a/src/backend/columnar/sql/fdw_query.sql b/src/test/regress/sql/fdw_query.sql similarity index 100% rename from src/backend/columnar/sql/fdw_query.sql rename to src/test/regress/sql/fdw_query.sql diff --git a/src/backend/columnar/sql/fdw_rollback.sql b/src/test/regress/sql/fdw_rollback.sql similarity index 100% rename from src/backend/columnar/sql/fdw_rollback.sql rename to src/test/regress/sql/fdw_rollback.sql diff --git a/src/backend/columnar/sql/fdw_truncate.sql b/src/test/regress/sql/fdw_truncate.sql similarity index 100% rename from src/backend/columnar/sql/fdw_truncate.sql rename to src/test/regress/sql/fdw_truncate.sql From 213eb93e6d4aa732f9ab926c261f68b934d3c3ca Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Thu, 5 Nov 2020 14:51:41 +0100 Subject: [PATCH 111/124] make columnar compile and functionally working --- Makefile.global.in | 3 + configure | 77 +++++++ configure.in | 21 ++ src/backend/columnar/.circleci/build.sh | 16 -- src/backend/columnar/.circleci/config.yml | 138 ------------ src/backend/columnar/.circleci/run_test.sh | 27 --- src/backend/columnar/LICENSE | 201 ------------------ src/backend/columnar/META.json | 45 ---- src/backend/columnar/Makefile | 102 --------- src/backend/columnar/TODO.md | 41 ---- src/backend/columnar/cstore_customscan.c | 6 + src/backend/columnar/cstore_fdw--1.0--1.1.sql | 26 --- src/backend/columnar/cstore_fdw--1.1--1.2.sql | 3 - src/backend/columnar/cstore_fdw--1.2--1.3.sql | 3 - src/backend/columnar/cstore_fdw--1.3--1.4.sql | 3 - src/backend/columnar/cstore_fdw--1.4--1.5.sql | 28 --- src/backend/columnar/cstore_fdw--1.5--1.6.sql | 19 -- src/backend/columnar/cstore_fdw--1.6--1.7.sql | 3 - src/backend/columnar/cstore_fdw--1.7--1.8.sql | 35 --- src/backend/columnar/cstore_fdw.control | 6 - src/backend/columnar/cstore_metadata_tables.c | 1 + src/backend/columnar/cstore_tableam.c | 6 + src/backend/columnar/mod.c | 8 +- .../columnar--9.5-1--10.0-1.sql} | 45 +++- src/backend/distributed/Makefile | 2 + src/backend/distributed/shared_library_init.c | 12 ++ .../distributed/sql/citus--9.5-1--10.0-1.sql | 2 + src/include/citus_config.h.in | 8 + src/include/citus_version.h.in | 6 + src/include/columnar/cstore.h | 1 + src/include/columnar/cstore_tableam.h | 4 + src/include/columnar/mod.h | 4 +- 32 files changed, 197 insertions(+), 705 deletions(-) delete mode 100755 src/backend/columnar/.circleci/build.sh delete mode 100644 src/backend/columnar/.circleci/config.yml delete mode 100755 src/backend/columnar/.circleci/run_test.sh delete mode 100644 src/backend/columnar/LICENSE delete mode 100644 src/backend/columnar/META.json delete mode 100644 src/backend/columnar/Makefile delete mode 100644 src/backend/columnar/TODO.md delete mode 100644 src/backend/columnar/cstore_fdw--1.0--1.1.sql delete mode 100644 src/backend/columnar/cstore_fdw--1.1--1.2.sql delete mode 100644 src/backend/columnar/cstore_fdw--1.2--1.3.sql delete mode 100644 src/backend/columnar/cstore_fdw--1.3--1.4.sql delete mode 100644 src/backend/columnar/cstore_fdw--1.4--1.5.sql delete mode 100644 src/backend/columnar/cstore_fdw--1.5--1.6.sql delete mode 100644 src/backend/columnar/cstore_fdw--1.6--1.7.sql delete mode 100644 src/backend/columnar/cstore_fdw--1.7--1.8.sql delete mode 100644 src/backend/columnar/cstore_fdw.control rename src/backend/columnar/{cstore_fdw--1.7.sql => sql/columnar--9.5-1--10.0-1.sql} (66%) diff --git a/Makefile.global.in b/Makefile.global.in index 8c7d7f3d1..aba3be6f6 100644 --- a/Makefile.global.in +++ b/Makefile.global.in @@ -92,5 +92,8 @@ endif override CPPFLAGS := @CPPFLAGS@ @CITUS_CPPFLAGS@ -I '${citus_abs_top_srcdir}/src/include' -I'${citus_top_builddir}/src/include' $(CPPFLAGS) override LDFLAGS += @LDFLAGS@ @CITUS_LDFLAGS@ +USE_FDW:=@USE_FDW@ +USE_TABLEAM:=@USE_TABLEAM@ + # optional file with user defined, additional, rules -include ${citus_abs_srcdir}/src/Makefile.custom diff --git a/configure b/configure index 9d088c3e0..e29228855 100755 --- a/configure +++ b/configure @@ -622,6 +622,8 @@ ac_includes_default="\ ac_subst_vars='LTLIBOBJS LIBOBJS +USE_TABLEAM +USE_FDW HAS_DOTGIT POSTGRES_BUILDDIR POSTGRES_SRCDIR @@ -690,6 +692,8 @@ with_extra_version enable_coverage with_libcurl with_reports_hostname +with_columnar_tableam +with_columnar_fdw ' ac_precious_vars='build_alias host_alias @@ -1323,6 +1327,9 @@ Optional Packages: --with-reports-hostname=HOSTNAME Use HOSTNAME as hostname for statistics collection and update checks + --without-columnar-tableam + compile with table access methods for columnar + --without-columnar-fdw compile with foreign data wrappers for columnar Some influential environment variables: PG_CONFIG Location to find pg_config for target PostgreSQL instalation @@ -4468,6 +4475,72 @@ cat >>confdefs.h <<_ACEOF _ACEOF +if test "$version_num" != '11'; then + + + +# Check whether --with-columnar-tableam was given. +if test "${with_columnar_tableam+set}" = set; then : + withval=$with_columnar_tableam; + case $withval in + yes) + USE_TABLEAM=yes + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-columnar-tableam option" "$LINENO" 5 + ;; + esac + +else + with_columnar_tableam=yes +USE_TABLEAM=yes +fi + + +else + { $as_echo "$as_me:${as_lineno-$LINENO}: postgres does not support table access methodds" >&5 +$as_echo "$as_me: postgres does not support table access methodds" >&6;} +fi; + +if test "$USE_TABLEAM" = yes; then + +$as_echo "#define USE_TABLEAM 1" >>confdefs.h + +fi; + + + + +# Check whether --with-columnar-fdw was given. +if test "${with_columnar_fdw+set}" = set; then : + withval=$with_columnar_fdw; + case $withval in + yes) + USE_FDW=yes + ;; + no) + : + ;; + *) + as_fn_error $? "no argument expected for --with-columnar-fdw option" "$LINENO" 5 + ;; + esac + +else + with_columnar_fdw=yes +USE_FDW=yes +fi + + +if test "$USE_FDW" = yes; then + +$as_echo "#define USE_FDW 1" >>confdefs.h + +fi; + # Check if git is installed, when installed the gitref of the checkout will be baked in the application # Extract the first word of "git", so it can be a program name with args. set dummy git; ac_word=$2 @@ -4543,6 +4616,10 @@ POSTGRES_BUILDDIR="$POSTGRES_BUILDDIR" HAS_DOTGIT="$HAS_DOTGIT" +USE_FDW="$USE_FDW" + +USE_TABLEAM="$USE_TABLEAM" + ac_config_files="$ac_config_files Makefile.global" diff --git a/configure.in b/configure.in index 58bcc906e..b31fc4ff5 100644 --- a/configure.in +++ b/configure.in @@ -212,6 +212,25 @@ PGAC_ARG_REQ(with, reports-hostname, [HOSTNAME], AC_DEFINE_UNQUOTED(REPORTS_BASE_URL, "$REPORTS_BASE_URL", [Base URL for statistics collection and update checks]) +if test "$version_num" != '11'; then + PGAC_ARG_BOOL(with, columnar-tableam, yes, + [compile with table access methods for columnar], + [USE_TABLEAM=yes]) +else + AC_MSG_NOTICE([postgres does not support table access methodds]) +fi; + +if test "$USE_TABLEAM" = yes; then + AC_DEFINE([USE_TABLEAM], 1, [Define to 1 to build with table access method support. (--with-columnar-tableam)]) +fi; + +PGAC_ARG_BOOL(with, columnar-fdw, yes, + [compile with foreign data wrappers for columnar], + [USE_FDW=yes]) +if test "$USE_FDW" = yes; then + AC_DEFINE([USE_FDW], 1, [Define to 1 to build with foreign datawrapper support. (--with-columnar-fdw)]) +fi; + # Check if git is installed, when installed the gitref of the checkout will be baked in the application AC_PATH_PROG(GIT_BIN, git) AC_CHECK_FILE(.git,[HAS_DOTGIT=yes], [HAS_DOTGIT=]) @@ -222,6 +241,8 @@ AC_SUBST(CITUS_LDFLAGS, "$LIBS $CITUS_LDFLAGS") AC_SUBST(POSTGRES_SRCDIR, "$POSTGRES_SRCDIR") AC_SUBST(POSTGRES_BUILDDIR, "$POSTGRES_BUILDDIR") AC_SUBST(HAS_DOTGIT, "$HAS_DOTGIT") +AC_SUBST(USE_FDW, "$USE_FDW") +AC_SUBST(USE_TABLEAM, "$USE_TABLEAM") AC_CONFIG_FILES([Makefile.global]) AC_CONFIG_HEADERS([src/include/citus_config.h] [src/include/citus_version.h]) diff --git a/src/backend/columnar/.circleci/build.sh b/src/backend/columnar/.circleci/build.sh deleted file mode 100755 index 6a9f14c74..000000000 --- a/src/backend/columnar/.circleci/build.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -set -euxo pipefail -IFS=$'\n\t' - -status=0 - -basedir="$(pwd)" -installdir="${basedir}/install-${PG_MAJOR}" - -make install DESTDIR="${installdir}" -pushd "${installdir}" -find . -type f -print > "${basedir}/files.lst" -cat "${basedir}/files.lst" -tar cvf "${basedir}/install-${PG_MAJOR}.tar" $(cat "${basedir}/files.lst") -popd diff --git a/src/backend/columnar/.circleci/config.yml b/src/backend/columnar/.circleci/config.yml deleted file mode 100644 index 645211182..000000000 --- a/src/backend/columnar/.circleci/config.yml +++ /dev/null @@ -1,138 +0,0 @@ -version: 2.1 -orbs: - codecov: codecov/codecov@1.1.1 - -jobs: - check-style: - docker: - - image: 'citus/stylechecker:latest' - steps: - - checkout - - run: - name: 'Check Style' - command: | - citus_indent --check - - run: - name: 'Check if whitespace fixing changed anything, install editorconfig if it did' - command: | - git diff --exit-code - - build-11: - docker: - - image: 'citus/extbuilder:11.9' - steps: - - checkout - - run: - name: 'Configure, Build, and Install' - command: | - PG_MAJOR=11 .circleci/build.sh - - persist_to_workspace: - root: . - paths: - - install-11.tar - - build-12: - docker: - - image: 'citus/extbuilder:12.4' - steps: - - checkout - - run: - name: 'Configure, Build, and Install' - command: | - PG_MAJOR=12 .circleci/build.sh - - persist_to_workspace: - root: . - paths: - - install-12.tar - - build-13: - docker: - - image: 'citus/extbuilder:13.0' - steps: - - checkout - - run: - name: 'Configure, Build, and Install' - command: | - PG_MAJOR=13 .circleci/build.sh - - persist_to_workspace: - root: . - paths: - - install-13.tar - - test-11_checkinstall: - docker: - - image: 'citus/exttester:11.9' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - run: - name: 'Prepare Container & Install Extension' - command: | - chown -R circleci:circleci /home/circleci - tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / - - run: - name: 'Run Test' - command: | - gosu circleci .circleci/run_test.sh installcheck - - codecov/upload: - flags: 'test_11,installcheck' - - test-12_checkinstall: - docker: - - image: 'citus/exttester:12.4' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - run: - name: 'Prepare Container & Install Extension' - command: | - chown -R circleci:circleci /home/circleci - tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / - - run: - name: 'Run Test' - command: | - gosu circleci .circleci/run_test.sh installcheck - - codecov/upload: - flags: 'test_12,installcheck' - - test-13_checkinstall: - docker: - - image: 'citus/exttester:13.0' - working_directory: /home/circleci/project - steps: - - checkout - - attach_workspace: - at: . - - run: - name: 'Prepare Container & Install Extension' - command: | - chown -R circleci:circleci /home/circleci - tar xfv "${CIRCLE_WORKING_DIRECTORY}/install-${PG_MAJOR}.tar" --directory / - - run: - name: 'Run Test' - command: | - gosu circleci .circleci/run_test.sh installcheck - - codecov/upload: - flags: 'test_13,installcheck' - -workflows: - version: 2 - build_and_test: - jobs: - - - check-style - - - build-11 - - build-12 - - build-13 - - - test-11_checkinstall: - requires: [build-11] - - test-12_checkinstall: - requires: [build-12] - - test-13_checkinstall: - requires: [build-13] diff --git a/src/backend/columnar/.circleci/run_test.sh b/src/backend/columnar/.circleci/run_test.sh deleted file mode 100755 index f9e183b56..000000000 --- a/src/backend/columnar/.circleci/run_test.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -set -euxo pipefail -IFS=$'\n\t' - -status=0 - -export PGPORT=${PGPORT:-55432} - -function cleanup { - pg_ctl -D /tmp/postgres stop - rm -rf /tmp/postgres -} -trap cleanup EXIT - -rm -rf /tmp/postgres -initdb -E unicode /tmp/postgres -echo "shared_preload_libraries = 'cstore_fdw'" >> /tmp/postgres/postgresql.conf -pg_ctl -D /tmp/postgres -o "-p ${PGPORT}" -l /tmp/postgres_logfile start || status=$? -if [ -z $status ]; then cat /tmp/postgres_logfile; fi - -make "${@}" || status=$? -diffs="regression.diffs" - -if test -f "${diffs}"; then cat "${diffs}"; fi - -exit $status diff --git a/src/backend/columnar/LICENSE b/src/backend/columnar/LICENSE deleted file mode 100644 index ad410e113..000000000 --- a/src/backend/columnar/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ -Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/src/backend/columnar/META.json b/src/backend/columnar/META.json deleted file mode 100644 index 19e819daa..000000000 --- a/src/backend/columnar/META.json +++ /dev/null @@ -1,45 +0,0 @@ -{ - "name": "cstore_fdw", - "abstract": "Columnar Store for PostgreSQL", - "description": "PostgreSQL extension which implements a Columnar Store.", - "version": "1.7.0", - "maintainer": "Murat Tuncer ", - "license": "apache_2_0", - "provides": { - "cstore_fdw": { - "abstract": "Foreign Data Wrapper for Columnar Store Tables", - "file": "cstore_fdw--1.7.sql", - "docfile": "README.md", - "version": "1.7.0" - } - }, - "prereqs": { - "runtime": { - "requires": { - "PostgreSQL": "9.3.0" - } - } - }, - "resources": { - "bugtracker": { - "web": "http://github.com/citusdata/cstore_fdw/issues/" - }, - "repository": { - "url": "git://github.com/citusdata/cstore_fdw.git", - "web": "https://github.com/citusdata/cstore_fdw/", - "type": "git" - } - }, - "generated_by": "Murat Tuncer", - "meta-spec": { - "version": "1.0.0", - "url": "http://pgxn.org/meta/spec.txt" - }, - "tags": [ - "orc", - "fdw", - "foreign data wrapper", - "cstore_fdw", - "columnar store" - ] -} diff --git a/src/backend/columnar/Makefile b/src/backend/columnar/Makefile deleted file mode 100644 index 6be7bbd45..000000000 --- a/src/backend/columnar/Makefile +++ /dev/null @@ -1,102 +0,0 @@ -# cstore_fdw/Makefile -# -# Copyright (c) 2016 Citus Data, Inc. -# - -MODULE_big = cstore_fdw - -VER := $(lastword $(shell pg_config --version)) -VER_WORDS = $(subst ., ,$(VER)) -MVER = $(firstword $(VER_WORDS)) - -# error for versions earlier than 10 so that lex comparison will work -ifneq ($(shell printf '%02d' $(MVER)),$(MVER)) -$(error version $(VER) not supported) -endif - -# lexicographic comparison of version number -ifeq ($(lastword $(sort 12 $(MVER))),$(MVER)) - USE_TABLEAM = yes - USE_FDW = yes -else ifeq ($(lastword $(sort 11 $(MVER))),$(MVER)) - USE_TABLEAM = no - USE_FDW = yes -else -$(error version $(VER) is not supported) -endif - -PG_CFLAGS = -std=c11 -Wshadow -Werror -OBJS = cstore.o cstore_writer.o cstore_reader.o \ - cstore_compression.o mod.o cstore_metadata_tables.o - -EXTENSION = cstore_fdw -DATA = cstore_fdw--1.7.sql cstore_fdw--1.6--1.7.sql cstore_fdw--1.5--1.6.sql cstore_fdw--1.4--1.5.sql \ - cstore_fdw--1.3--1.4.sql cstore_fdw--1.2--1.3.sql cstore_fdw--1.1--1.2.sql \ - cstore_fdw--1.0--1.1.sql cstore_fdw--1.7--1.8.sql - -REGRESS = extension_create -ISOLATION = create -EXTRA_CLEAN = sql/fdw_block_filtering.sql sql/fdw_create.sql sql/fdw_data_types.sql sql/fdw_load.sql \ - sql/fdw_copyto.sql expected/fdw_block_filtering.out expected/fdw_create.out \ - expected/fdw_data_types.out expected/fdw_load.out expected/fdw_copyto.out \ - sql/am_block_filtering.sql sql/am_create.sql sql/am_data_types.sql sql/am_load.sql \ - sql/am_copyto.sql expected/am_block_filtering.out \ - expected/am_data_types.out expected/am_load.out expected/am_copyto.out - -ifeq ($(USE_FDW),yes) - PG_CFLAGS += -DUSE_FDW - OBJS += cstore_fdw.o - REGRESS += fdw_create fdw_load fdw_query fdw_analyze fdw_data_types \ - fdw_functions fdw_block_filtering fdw_drop fdw_insert \ - fdw_copyto fdw_alter fdw_rollback fdw_truncate fdw_clean -endif - -ifeq ($(USE_TABLEAM),yes) - PG_CFLAGS += -DUSE_TABLEAM - OBJS += cstore_tableam.o cstore_customscan.o - REGRESS += am_create am_load am_query am_analyze am_data_types am_functions \ - am_drop am_insert am_copyto am_alter am_rollback am_truncate am_vacuum am_clean \ - am_block_filtering am_join am_trigger am_tableoptions - ISOLATION += am_write_concurrency am_vacuum_vs_insert -endif - -ifeq ($(enable_coverage),yes) - PG_CPPFLAGS += --coverage - SHLIB_LINK += --coverage - EXTRA_CLEAN += *.gcno -endif - -UNAME_S := $(shell uname -s) -ifeq ($(UNAME_S),Darwin) - PG_CPPFLAGS += -I/usr/local/include -endif - -# -# Users need to specify their Postgres installation path through pg_config. For -# example: /usr/local/pgsql/bin/pg_config or /usr/lib/postgresql/9.3/bin/pg_config -# - -PG_CONFIG = pg_config -PGXS := $(shell $(PG_CONFIG) --pgxs) -include $(PGXS) - -# command for getting postgres source directory is taken from citus/configure.in -POSTGRES_SRCDIR=$(shell grep ^abs_top_srcdir $(shell dirname $(shell $(PG_CONFIG) --pgxs))/../Makefile.global|cut -d ' ' -f3-) -PGXS_ISOLATION_TESTER=$(top_builddir)/src/test/isolation/pg_isolation_regress - -# If postgres installation doesn't include pg_isolation_regress, try using the -# one in postgres source directory. -ifeq (,$(wildcard $(PGXS_ISOLATION_TESTER))) - pg_isolation_regress_installcheck = \ - $(POSTGRES_SRCDIR)/src/test/isolation/pg_isolation_regress \ - --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) -else - pg_isolation_regress_installcheck = \ - $(PGXS_ISOLATION_TESTER) \ - --inputdir=$(srcdir) $(EXTRA_REGRESS_OPTS) -endif - -installcheck: - -reindent: - citus_indent . diff --git a/src/backend/columnar/TODO.md b/src/backend/columnar/TODO.md deleted file mode 100644 index 179fbc8c7..000000000 --- a/src/backend/columnar/TODO.md +++ /dev/null @@ -1,41 +0,0 @@ -To see the list of features and bug-fixes planned for next releases, see our -[development roadmap][roadmap]. - -Requested Features ------------------- - -* Improve write performance -* Improve read performance -* Add checksum logic -* Add new compression methods -* Enable INSERT/DELETE/UPDATE -* Enable users other than superuser to safely create columnar tables (permissions) -* Transactional semantics -* Add config setting to make pg\_fsync() optional - - -Known Issues ------------- - -* Copy command ignores NOT NULL constraints. -* Planning functions don't take into account average column width. -* Planning functions don't correctly take into account block skipping benefits. -* On 32-bit platforms, when file size is outside the 32-bit signed range, EXPLAIN - command prints incorrect file size. -* If two different columnar tables are configured to point to the same file, - writes to the underlying file aren't protected from each other. -* When a data load is in progress, concurrent reads on the table overestimate the - page count. -* We have a minor memory leak in CStoreEndWrite. We need to also free the - comparisonFunctionArray. -* block\_filtering test fails on Ubuntu because the "da\_DK" locale is not enabled - by default. -* We don't yet incorporate the compression method's impact on disk I/O into cost - estimates. -* CitusDB integration errors: -* Concurrent staging cstore\_fdw tables doesn't work. -* Setting a default value for column with ALTER TABLE has limited support for - existing rows. - -[roadmap]: https://github.com/citusdata/cstore_fdw/wiki/Roadmap - diff --git a/src/backend/columnar/cstore_customscan.c b/src/backend/columnar/cstore_customscan.c index 075061caf..5e2d4c461 100644 --- a/src/backend/columnar/cstore_customscan.c +++ b/src/backend/columnar/cstore_customscan.c @@ -10,6 +10,9 @@ *------------------------------------------------------------------------- */ +#include "citus_version.h" +#if USE_TABLEAM + #include "postgres.h" #include "access/skey.h" @@ -431,3 +434,6 @@ CStoreScan_ReScanCustomScan(CustomScanState *node) table_rescan(node->ss.ss_currentScanDesc, NULL); } } + + +#endif /* USE_TABLEAM */ diff --git a/src/backend/columnar/cstore_fdw--1.0--1.1.sql b/src/backend/columnar/cstore_fdw--1.0--1.1.sql deleted file mode 100644 index 9e8029638..000000000 --- a/src/backend/columnar/cstore_fdw--1.0--1.1.sql +++ /dev/null @@ -1,26 +0,0 @@ -/* cstore_fdw/cstore_fdw--1.0--1.1.sql */ - --- complain if script is sourced in psql, rather than via ALTER EXTENSION UPDATE -\echo Use "ALTER EXTENSION cstore_fdw UPDATE TO '1.1'" to load this file. \quit - -CREATE FUNCTION cstore_ddl_event_end_trigger() -RETURNS event_trigger -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; - -CREATE EVENT TRIGGER cstore_ddl_event_end -ON ddl_command_end -EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); - -CREATE FUNCTION cstore_table_size(relation regclass) -RETURNS bigint -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; - --- cstore_fdw creates directories to store files for tables with automatically --- determined filename during the CREATE SERVER statement. Since this feature --- was newly added in v1.1, servers created with v1.0 did not create them. So, --- we create a server with v1.1 to ensure that the required directories are --- created to allow users to create automatically managed tables with old servers. -CREATE SERVER cstore_server_for_updating_1_0_to_1_1 FOREIGN DATA WRAPPER cstore_fdw; -DROP SERVER cstore_server_for_updating_1_0_to_1_1; diff --git a/src/backend/columnar/cstore_fdw--1.1--1.2.sql b/src/backend/columnar/cstore_fdw--1.1--1.2.sql deleted file mode 100644 index 6cabb8c5e..000000000 --- a/src/backend/columnar/cstore_fdw--1.1--1.2.sql +++ /dev/null @@ -1,3 +0,0 @@ -/* cstore_fdw/cstore_fdw--1.1--1.2.sql */ - --- No new functions or definitions were added in 1.2 diff --git a/src/backend/columnar/cstore_fdw--1.2--1.3.sql b/src/backend/columnar/cstore_fdw--1.2--1.3.sql deleted file mode 100644 index 3ad187d09..000000000 --- a/src/backend/columnar/cstore_fdw--1.2--1.3.sql +++ /dev/null @@ -1,3 +0,0 @@ -/* cstore_fdw/cstore_fdw--1.2--1.3.sql */ - --- No new functions or definitions were added in 1.3 diff --git a/src/backend/columnar/cstore_fdw--1.3--1.4.sql b/src/backend/columnar/cstore_fdw--1.3--1.4.sql deleted file mode 100644 index 3b7b0f150..000000000 --- a/src/backend/columnar/cstore_fdw--1.3--1.4.sql +++ /dev/null @@ -1,3 +0,0 @@ -/* cstore_fdw/cstore_fdw--1.3--1.4.sql */ - --- No new functions or definitions were added in 1.4 diff --git a/src/backend/columnar/cstore_fdw--1.4--1.5.sql b/src/backend/columnar/cstore_fdw--1.4--1.5.sql deleted file mode 100644 index 55bbb0b2a..000000000 --- a/src/backend/columnar/cstore_fdw--1.4--1.5.sql +++ /dev/null @@ -1,28 +0,0 @@ -/* cstore_fdw/cstore_fdw--1.4--1.5.sql */ - -CREATE FUNCTION cstore_clean_table_resources(oid) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT; - -CREATE OR REPLACE FUNCTION cstore_drop_trigger() - RETURNS event_trigger - LANGUAGE plpgsql - AS $csdt$ -DECLARE v_obj record; -BEGIN - FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP - - IF v_obj.object_type NOT IN ('table', 'foreign table') THEN - CONTINUE; - END IF; - - PERFORM cstore_clean_table_resources(v_obj.objid); - - END LOOP; -END; -$csdt$; - -CREATE EVENT TRIGGER cstore_drop_event - ON SQL_DROP - EXECUTE PROCEDURE cstore_drop_trigger(); diff --git a/src/backend/columnar/cstore_fdw--1.5--1.6.sql b/src/backend/columnar/cstore_fdw--1.5--1.6.sql deleted file mode 100644 index c8f7e8097..000000000 --- a/src/backend/columnar/cstore_fdw--1.5--1.6.sql +++ /dev/null @@ -1,19 +0,0 @@ -/* cstore_fdw/cstore_fdw--1.5--1.6.sql */ - -CREATE OR REPLACE FUNCTION cstore_drop_trigger() - RETURNS event_trigger - LANGUAGE plpgsql - AS $csdt$ -DECLARE v_obj record; -BEGIN - FOR v_obj IN SELECT * FROM pg_event_trigger_dropped_objects() LOOP - - IF v_obj.object_type NOT IN ('table', 'foreign table') THEN - CONTINUE; - END IF; - - PERFORM public.cstore_clean_table_resources(v_obj.objid); - - END LOOP; -END; -$csdt$; diff --git a/src/backend/columnar/cstore_fdw--1.6--1.7.sql b/src/backend/columnar/cstore_fdw--1.6--1.7.sql deleted file mode 100644 index c7f56f059..000000000 --- a/src/backend/columnar/cstore_fdw--1.6--1.7.sql +++ /dev/null @@ -1,3 +0,0 @@ -/* cstore_fdw/cstore_fdw--1.6--1.6.sql */ - --- No new functions or definitions were added in 1.7 diff --git a/src/backend/columnar/cstore_fdw--1.7--1.8.sql b/src/backend/columnar/cstore_fdw--1.7--1.8.sql deleted file mode 100644 index 81cbadfb4..000000000 --- a/src/backend/columnar/cstore_fdw--1.7--1.8.sql +++ /dev/null @@ -1,35 +0,0 @@ -/* cstore_fdw/cstore_fdw--1.7--1.8.sql */ - -DO $proc$ -BEGIN - -IF version() ~ '12' or version() ~ '13' THEN - EXECUTE $$ - CREATE FUNCTION cstore_tableam_handler(internal) - RETURNS table_am_handler - LANGUAGE C - AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; - - CREATE ACCESS METHOD cstore_tableam - TYPE TABLE HANDLER cstore_tableam_handler; - - CREATE FUNCTION pg_catalog.alter_cstore_table_set( - table_name regclass, - block_row_count int DEFAULT NULL, - stripe_row_count int DEFAULT NULL, - compression name DEFAULT null) - RETURNS void - LANGUAGE C - AS 'MODULE_PATHNAME', 'alter_cstore_table_set'; - - CREATE FUNCTION pg_catalog.alter_cstore_table_reset( - table_name regclass, - block_row_count bool DEFAULT false, - stripe_row_count bool DEFAULT false, - compression bool DEFAULT false) - RETURNS void - LANGUAGE C - AS 'MODULE_PATHNAME', 'alter_cstore_table_reset'; - $$; -END IF; -END$proc$; diff --git a/src/backend/columnar/cstore_fdw.control b/src/backend/columnar/cstore_fdw.control deleted file mode 100644 index 57fd0808a..000000000 --- a/src/backend/columnar/cstore_fdw.control +++ /dev/null @@ -1,6 +0,0 @@ -# cstore_fdw extension -comment = 'foreign-data wrapper for flat cstore access' -default_version = '1.8' -module_pathname = '$libdir/cstore_fdw' -relocatable = false -schema = cstore diff --git a/src/backend/columnar/cstore_metadata_tables.c b/src/backend/columnar/cstore_metadata_tables.c index 1d6f36c02..4e40aee68 100644 --- a/src/backend/columnar/cstore_metadata_tables.c +++ b/src/backend/columnar/cstore_metadata_tables.c @@ -9,6 +9,7 @@ #include "postgres.h" + #include "columnar/cstore.h" #include "columnar/cstore_version_compat.h" diff --git a/src/backend/columnar/cstore_tableam.c b/src/backend/columnar/cstore_tableam.c index a8a87425f..f0dd44de2 100644 --- a/src/backend/columnar/cstore_tableam.c +++ b/src/backend/columnar/cstore_tableam.c @@ -1,3 +1,6 @@ +#include "citus_version.h" +#if USE_TABLEAM + #include "postgres.h" #include @@ -1417,3 +1420,6 @@ alter_cstore_table_reset(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } + + +#endif diff --git a/src/backend/columnar/mod.c b/src/backend/columnar/mod.c index 2344a5d68..ee3ba6d98 100644 --- a/src/backend/columnar/mod.c +++ b/src/backend/columnar/mod.c @@ -15,6 +15,8 @@ #include "fmgr.h" +#include "citus_version.h" + #include "columnar/cstore.h" #include "columnar/mod.h" @@ -26,10 +28,8 @@ #include "columnar/cstore_fdw.h" #endif -PG_MODULE_MAGIC; - void -_PG_init(void) +columnar_init(void) { cstore_init(); @@ -44,7 +44,7 @@ _PG_init(void) void -_PG_fini(void) +columnar_fini(void) { #if USE_TABLEAM cstore_tableam_finish(); diff --git a/src/backend/columnar/cstore_fdw--1.7.sql b/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql similarity index 66% rename from src/backend/columnar/cstore_fdw--1.7.sql rename to src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql index 1f874ce60..b80239f6a 100644 --- a/src/backend/columnar/cstore_fdw--1.7.sql +++ b/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql @@ -1,7 +1,7 @@ -/* cstore_fdw/cstore_fdw--1.7.sql */ +/* columnar--9.5-1--10.0-1.sql */ --- complain if script is sourced in psql, rather than via CREATE EXTENSION -\echo Use "CREATE EXTENSION cstore_fdw" to load this file. \quit +CREATE SCHEMA cstore; +SET search_path TO cstore; CREATE FUNCTION cstore_fdw_handler() RETURNS fdw_handler @@ -86,3 +86,42 @@ FROM pg_class c JOIN cstore.cstore_data_files d USING(relfilenode); COMMENT ON VIEW cstore_options IS 'CStore per table settings'; + +DO $proc$ +BEGIN + +-- from version 12 and up we have support for tableam's if installed on pg11 we can't +-- create the objects here. Instead we rely on citus_finish_pg_upgrade to be called by the +-- user instead to add the missing objects +IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN + EXECUTE $$ + CREATE FUNCTION cstore_tableam_handler(internal) + RETURNS table_am_handler + LANGUAGE C + AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; + + CREATE ACCESS METHOD cstore_tableam + TYPE TABLE HANDLER cstore_tableam_handler; + + CREATE FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int DEFAULT NULL, + stripe_row_count int DEFAULT NULL, + compression name DEFAULT null) + RETURNS void + LANGUAGE C + AS 'MODULE_PATHNAME', 'alter_cstore_table_set'; + + CREATE FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool DEFAULT false, + stripe_row_count bool DEFAULT false, + compression bool DEFAULT false) + RETURNS void + LANGUAGE C + AS 'MODULE_PATHNAME', 'alter_cstore_table_reset'; + $$; +END IF; +END$proc$; + +RESET search_path; diff --git a/src/backend/distributed/Makefile b/src/backend/distributed/Makefile index 507c72d9d..54aa9c3be 100644 --- a/src/backend/distributed/Makefile +++ b/src/backend/distributed/Makefile @@ -19,6 +19,8 @@ DATA_built = $(generated_sql_files) # directories with source files SUBDIRS = . commands connection ddl deparser executor metadata operations planner progress relay safeclib test transaction utils worker +# columnar modules +SUBDIRS += ../columnar # enterprise modules SUBDIRS += diff --git a/src/backend/distributed/shared_library_init.c b/src/backend/distributed/shared_library_init.c index 7973aaaf9..bd25d14ef 100644 --- a/src/backend/distributed/shared_library_init.c +++ b/src/backend/distributed/shared_library_init.c @@ -84,6 +84,8 @@ #include "utils/guc_tables.h" #include "utils/varlena.h" +#include "columnar/mod.h" + /* marks shared object as one loadable by the postgres version compiled against */ PG_MODULE_MAGIC; @@ -92,6 +94,7 @@ static char *CitusVersion = CITUS_VERSION; void _PG_init(void); +void _PG_fini(void); static void DoInitialCleanup(void); static void ResizeStackToMaximumDepth(void); @@ -311,6 +314,15 @@ _PG_init(void) { DoInitialCleanup(); } + columnar_init(); +} + + +/* shared library deconstruction function */ +void +_PG_fini(void) +{ + columnar_fini(); } diff --git a/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql b/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql index fb96a78fe..e27645ff8 100644 --- a/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql +++ b/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql @@ -1,3 +1,5 @@ -- citus--9.5-1--10.0-1 -- bump version to 10.0-1 + +#include "../../columnar/sql/columnar--9.5-1--10.0-1.sql" diff --git a/src/include/citus_config.h.in b/src/include/citus_config.h.in index 5943116c3..679f91c09 100644 --- a/src/include/citus_config.h.in +++ b/src/include/citus_config.h.in @@ -87,3 +87,11 @@ /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS + +/* Define to 1 to build with foreign datawrapper support. + (--with-columnar-fdw) */ +#undef USE_FDW + +/* Define to 1 to build with table access method support. + (--with-columnar-tableam) */ +#undef USE_TABLEAM diff --git a/src/include/citus_version.h.in b/src/include/citus_version.h.in index e2594767e..778d59233 100644 --- a/src/include/citus_version.h.in +++ b/src/include/citus_version.h.in @@ -26,3 +26,9 @@ /* Base URL for statistics collection and update checks */ #undef REPORTS_BASE_URL + +/* columnar foreign data wrapper capability */ +#undef USE_FDW + +/* columnar table access method capability */ +#undef USE_TABLEAM diff --git a/src/include/columnar/cstore.h b/src/include/columnar/cstore.h index 35598cd41..ff0051b95 100644 --- a/src/include/columnar/cstore.h +++ b/src/include/columnar/cstore.h @@ -13,6 +13,7 @@ #ifndef CSTORE_H #define CSTORE_H +#include "postgres.h" #include "fmgr.h" #include "lib/stringinfo.h" diff --git a/src/include/columnar/cstore_tableam.h b/src/include/columnar/cstore_tableam.h index 557506b9f..7bd879068 100644 --- a/src/include/columnar/cstore_tableam.h +++ b/src/include/columnar/cstore_tableam.h @@ -1,3 +1,6 @@ +#include "citus_version.h" +#if USE_TABLEAM + #include "postgres.h" #include "fmgr.h" #include "access/tableam.h" @@ -13,3 +16,4 @@ extern TableScanDesc cstore_beginscan_extended(Relation relation, Snapshot snaps ParallelTableScanDesc parallel_scan, uint32 flags, Bitmapset *attr_needed, List *scanQual); +#endif diff --git a/src/include/columnar/mod.h b/src/include/columnar/mod.h index 3196bc809..8bf8db612 100644 --- a/src/include/columnar/mod.h +++ b/src/include/columnar/mod.h @@ -15,7 +15,7 @@ #define MOD_H /* Function declarations for extension loading and unloading */ -extern void _PG_init(void); -extern void _PG_fini(void); +extern void columnar_init(void); +extern void columnar_fini(void); #endif /* MOD_H */ From 3bb65549761d90e10352a085cfe56b3ffa404be4 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Thu, 5 Nov 2020 16:35:01 +0100 Subject: [PATCH 112/124] make tests run --- .circleci/config.yml | 103 +++++++++++++++++ src/test/regress/Makefile | 19 +++ src/test/regress/bin/normalize.sed | 3 + .../regress/columnar_am_isolation_schedule | 2 + src/test/regress/columnar_am_schedule | 18 +++ .../regress/columnar_fdw_isolation_schedule | 1 + src/test/regress/columnar_fdw_schedule | 14 +++ src/test/regress/expected/.gitignore | 10 ++ src/test/regress/expected/am_alter.out | 82 +++++++------ src/test/regress/expected/am_analyze.out | 8 +- src/test/regress/expected/am_drop.out | 17 +-- src/test/regress/expected/am_functions.out | 4 +- src/test/regress/expected/am_insert.out | 28 ++--- src/test/regress/expected/am_join.out | 8 +- src/test/regress/expected/am_query.out | 40 +++---- src/test/regress/expected/am_rollback.out | 36 +++--- src/test/regress/expected/am_tableoptions.out | 96 ++++++++-------- src/test/regress/expected/am_trigger.out | 12 ++ src/test/regress/expected/am_truncate.out | 108 +++++++++--------- src/test/regress/expected/am_truncate_0.out | 108 +++++++++--------- src/test/regress/expected/am_vacuum.out | 108 +++++++++--------- .../regress/expected/extension_create.out | 2 - src/test/regress/expected/fdw_alter.out | 82 +++++++------ src/test/regress/expected/fdw_analyze.out | 8 +- src/test/regress/expected/fdw_drop.out | 17 +-- src/test/regress/expected/fdw_functions.out | 4 +- src/test/regress/expected/fdw_insert.out | 28 ++--- src/test/regress/expected/fdw_query.out | 40 +++---- src/test/regress/expected/fdw_rollback.out | 36 +++--- src/test/regress/expected/fdw_truncate.out | 108 +++++++++--------- src/test/regress/expected/fdw_truncate_0.out | 108 +++++++++--------- src/test/regress/sql/.gitignore | 10 ++ src/test/regress/sql/am_drop.sql | 6 +- src/test/regress/sql/extension_create.sql | 4 - src/test/regress/sql/fdw_drop.sql | 6 +- 35 files changed, 736 insertions(+), 548 deletions(-) create mode 100644 src/test/regress/columnar_am_isolation_schedule create mode 100644 src/test/regress/columnar_am_schedule create mode 100644 src/test/regress/columnar_fdw_isolation_schedule create mode 100644 src/test/regress/columnar_fdw_schedule delete mode 100644 src/test/regress/expected/extension_create.out delete mode 100644 src/test/regress/sql/extension_create.sql diff --git a/.circleci/config.yml b/.circleci/config.yml index 3c2018523..ba9d6a4ac 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -199,6 +199,37 @@ jobs: flags: 'test_11,follower-cluster' - store_artifacts: path: '/tmp/core_dumps' + + test-11_check-columnar: + docker: + - image: 'citus/exttester:11.9' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Install and Test (check-columnar)' + command: 'chown -R circleci:circleci /home/circleci && install-and-test-ext check-columnar' + no_output_timeout: 2m + - codecov/upload: + flags: 'test_11,columnar' + + test-11_check-columnar-isolation: + docker: + - image: 'citus/exttester:11.9' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Install and Test (check-columnar-isolation)' + command: 'chown -R circleci:circleci /home/circleci && install-and-test-ext check-columnar-isolation' + no_output_timeout: 2m + - codecov/upload: + flags: 'test_11,columnar,isolation' + test-11_check-failure: docker: - image: 'citus/failtester:11.9' @@ -337,6 +368,36 @@ jobs: - store_artifacts: path: '/tmp/core_dumps' + test-12_check-columnar: + docker: + - image: 'citus/exttester:12.4' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Install and Test (check-columnar)' + command: 'chown -R circleci:circleci /home/circleci && install-and-test-ext check-columnar' + no_output_timeout: 2m + - codecov/upload: + flags: 'test_12,columnar' + + test-12_check-columnar-isolation: + docker: + - image: 'citus/exttester:12.4' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Install and Test (check-columnar-isolation)' + command: 'chown -R circleci:circleci /home/circleci && install-and-test-ext check-columnar-isolation' + no_output_timeout: 2m + - codecov/upload: + flags: 'test_12,columnar,isolation' + test-12_check-failure: docker: - image: 'citus/failtester:12.4' @@ -473,6 +534,36 @@ jobs: - store_artifacts: path: '/tmp/core_dumps' + test-13_check-columnar: + docker: + - image: 'citus/exttester:13.0' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Install and Test (check-columnar)' + command: 'chown -R circleci:circleci /home/circleci && install-and-test-ext check-columnar' + no_output_timeout: 2m + - codecov/upload: + flags: 'test_13,columnar' + + test-13_check-columnar-isolation: + docker: + - image: 'citus/exttester:13.0' + working_directory: /home/circleci/project + steps: + - checkout + - attach_workspace: + at: . + - run: + name: 'Install and Test (check-columnar-isolation)' + command: 'chown -R circleci:circleci /home/circleci && install-and-test-ext check-columnar-isolation' + no_output_timeout: 2m + - codecov/upload: + flags: 'test_13,columnar,isolation' + test-13_check-failure: docker: - image: 'citus/failtester:13.0' @@ -556,6 +647,10 @@ workflows: requires: [build-11] - test-11_check-follower-cluster: requires: [build-11] + - test-11_check-columnar: + requires: [build-11] + - test-11_check-columnar-isolation: + requires: [build-11] - test-11_check-failure: requires: [build-11] @@ -571,6 +666,10 @@ workflows: requires: [build-12] - test-12_check-follower-cluster: requires: [build-12] + - test-12_check-columnar: + requires: [build-12] + - test-12_check-columnar-isolation: + requires: [build-12] - test-12_check-failure: requires: [build-12] @@ -586,6 +685,10 @@ workflows: requires: [build-13] - test-13_check-follower-cluster: requires: [build-13] + - test-13_check-columnar: + requires: [build-13] + - test-13_check-columnar-isolation: + requires: [build-13] - test-13_check-failure: requires: [build-13] diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index 8be9fae39..cf79f7b06 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -161,6 +161,25 @@ check-follower-cluster: all $(pg_regress_multi_check) --load-extension=citus --follower-cluster \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/multi_follower_schedule $(EXTRA_TESTS) +COLUMNAR_SCHEDULES = +COLUMNAR_ISOLATION_SCHEDULES = +ifeq ($(USE_FDW),yes) + COLUMNAR_SCHEDULES += columnar_fdw_schedule + COLUMNAR_ISOLATION_SCHEDULES += columnar_fdw_isolation_schedule +endif +ifeq ($(USE_TABLEAM),yes) + COLUMNAR_SCHEDULES += columnar_am_schedule + COLUMNAR_ISOLATION_SCHEDULES += columnar_am_isolation_schedule +endif + +check-columnar: + $(pg_regress_multi_check) --load-extension=citus \ + -- $(MULTI_REGRESS_OPTS) $(addprefix --schedule=$(citus_abs_srcdir)/,$(COLUMNAR_SCHEDULES)) $(EXTRA_TESTS) + +check-columnar-isolation: all $(isolation_test_files) + $(pg_regress_multi_check) --load-extension=citus --isolationtester \ + -- $(MULTI_REGRESS_OPTS) --inputdir=$(citus_abs_srcdir)/build $(addprefix --schedule=$(citus_abs_srcdir)/,$(COLUMNAR_ISOLATION_SCHEDULES)) $(EXTRA_TESTS) + check-failure: all $(pg_regress_multi_check) --load-extension=citus --mitmproxy \ -- $(MULTI_REGRESS_OPTS) --schedule=$(citus_abs_srcdir)/failure_schedule $(EXTRA_TESTS) diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index c2a1fa58d..5739d67fb 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -181,3 +181,6 @@ s/wrong data type: [0-9]+, expected [0-9]+/wrong data type: XXXX, expected XXXX/ # Errors with relation OID does not exist s/relation with OID [0-9]+ does not exist/relation with OID XXXX does not exist/g + +# ignore event triggers, mainly due to the event trigger for columnar +/^DEBUG: EventTriggerInvoke [0-9]+$/d diff --git a/src/test/regress/columnar_am_isolation_schedule b/src/test/regress/columnar_am_isolation_schedule new file mode 100644 index 000000000..dab6e0acc --- /dev/null +++ b/src/test/regress/columnar_am_isolation_schedule @@ -0,0 +1,2 @@ +test: am_write_concurrency +test: am_vacuum_vs_insert diff --git a/src/test/regress/columnar_am_schedule b/src/test/regress/columnar_am_schedule new file mode 100644 index 000000000..70ae97db3 --- /dev/null +++ b/src/test/regress/columnar_am_schedule @@ -0,0 +1,18 @@ +test: am_create +test: am_load +test: am_query +test: am_analyze +test: am_data_types +test: am_functions +test: am_drop +test: am_insert +test: am_copyto +test: am_alter +test: am_rollback +test: am_truncate +test: am_vacuum +test: am_clean +test: am_block_filtering +test: am_join +test: am_trigger +test: am_tableoptions diff --git a/src/test/regress/columnar_fdw_isolation_schedule b/src/test/regress/columnar_fdw_isolation_schedule new file mode 100644 index 000000000..4b9bae8a3 --- /dev/null +++ b/src/test/regress/columnar_fdw_isolation_schedule @@ -0,0 +1 @@ +# just an empty file now, please remove when we have a test diff --git a/src/test/regress/columnar_fdw_schedule b/src/test/regress/columnar_fdw_schedule new file mode 100644 index 000000000..6998f3bf5 --- /dev/null +++ b/src/test/regress/columnar_fdw_schedule @@ -0,0 +1,14 @@ +test: fdw_create +test: fdw_load +test: fdw_query +test: fdw_analyze +test: fdw_data_types +test: fdw_functions +test: fdw_block_filtering +test: fdw_drop +test: fdw_insert +test: fdw_copyto +test: fdw_alter +test: fdw_rollback +test: fdw_truncate +test: fdw_clean diff --git a/src/test/regress/expected/.gitignore b/src/test/regress/expected/.gitignore index fb4e05d04..de611255e 100644 --- a/src/test/regress/expected/.gitignore +++ b/src/test/regress/expected/.gitignore @@ -21,3 +21,13 @@ /multi_behavioral_analytics_create_table.out /multi_insert_select_behavioral_analytics_create_table.out /hyperscale_tutorial.out +/am_block_filtering.out +/am_copyto.out +/am_create.out +/am_data_types.out +/am_load.out +/fdw_block_filtering.out +/fdw_copyto.out +/fdw_create.out +/fdw_data_types.out +/fdw_load.out diff --git a/src/test/regress/expected/am_alter.out b/src/test/regress/expected/am_alter.out index bd0737b4b..bfe4c281f 100644 --- a/src/test/regress/expected/am_alter.out +++ b/src/test/regress/expected/am_alter.out @@ -14,8 +14,8 @@ ALTER TABLE test_alter_table DROP COLUMN a; ANALYZE test_alter_table; -- verify select queries run as expected SELECT * FROM test_alter_table; - b | c ----+--- + b | c +--------------------------------------------------------------------- 2 | 3 5 | 6 8 | 9 @@ -23,11 +23,9 @@ SELECT * FROM test_alter_table; SELECT a FROM test_alter_table; ERROR: column "a" does not exist -LINE 1: SELECT a FROM test_alter_table; - ^ SELECT b FROM test_alter_table; - b ---- + b +--------------------------------------------------------------------- 2 5 8 @@ -36,36 +34,34 @@ SELECT b FROM test_alter_table; -- verify insert runs as expected INSERT INTO test_alter_table (SELECT 3, 5, 8); ERROR: INSERT has more expressions than target columns -LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); - ^ INSERT INTO test_alter_table (SELECT 5, 8); -- add a column with no defaults ALTER TABLE test_alter_table ADD COLUMN d int; SELECT * FROM test_alter_table; - b | c | d ----+---+--- - 2 | 3 | - 5 | 6 | - 8 | 9 | - 5 | 8 | + b | c | d +--------------------------------------------------------------------- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | (4 rows) INSERT INTO test_alter_table (SELECT 3, 5, 8); SELECT * FROM test_alter_table; - b | c | d ----+---+--- - 2 | 3 | - 5 | 6 | - 8 | 9 | - 5 | 8 | + b | c | d +--------------------------------------------------------------------- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | 3 | 5 | 8 (5 rows) -- add a fixed-length column with default value ALTER TABLE test_alter_table ADD COLUMN e int default 3; SELECT * from test_alter_table; - b | c | d | e ----+---+---+--- + b | c | d | e +--------------------------------------------------------------------- 2 | 3 | | 3 5 | 6 | | 3 8 | 9 | | 3 @@ -75,8 +71,8 @@ SELECT * from test_alter_table; INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); SELECT * from test_alter_table; - b | c | d | e ----+---+---+--- + b | c | d | e +--------------------------------------------------------------------- 2 | 3 | | 3 5 | 6 | | 3 8 | 9 | | 3 @@ -88,8 +84,8 @@ SELECT * from test_alter_table; -- add a variable-length column with default value ALTER TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; SELECT * from test_alter_table; - b | c | d | e | f ----+---+---+---+--------- + b | c | d | e | f +--------------------------------------------------------------------- 2 | 3 | | 3 | TEXT ME 5 | 6 | | 3 | TEXT ME 8 | 9 | | 3 | TEXT ME @@ -100,8 +96,8 @@ SELECT * from test_alter_table; INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); SELECT * from test_alter_table; - b | c | d | e | f ----+---+---+---+--------- + b | c | d | e | f +--------------------------------------------------------------------- 2 | 3 | | 3 | TEXT ME 5 | 6 | | 3 | TEXT ME 8 | 9 | | 3 | TEXT ME @@ -116,8 +112,8 @@ ALTER TABLE test_alter_table DROP COLUMN c; ALTER TABLE test_alter_table DROP COLUMN e; ANALYZE test_alter_table; SELECT * from test_alter_table; - b | d | f ----+---+--------- + b | d | f +--------------------------------------------------------------------- 2 | | TEXT ME 5 | | TEXT ME 8 | | TEXT ME @@ -128,14 +124,14 @@ SELECT * from test_alter_table; (7 rows) SELECT count(*) from test_alter_table; - count -------- + count +--------------------------------------------------------------------- 7 (1 row) SELECT count(t.*) from test_alter_table t; - count -------- + count +--------------------------------------------------------------------- 7 (1 row) @@ -152,15 +148,15 @@ HINT: Expression is either mutable or does not evaluate to constant value ALTER TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; ANALYZE test_alter_table; SELECT * FROM test_alter_table; - b | d | f | g | h ----+---+---------+---+--- - 2 | | TEXT ME | | - 5 | | TEXT ME | | - 8 | | TEXT ME | | - 5 | | TEXT ME | | - 3 | 8 | TEXT ME | | - 1 | 4 | TEXT ME | | - 1 | 4 | ABCDEF | | + b | d | f | g | h +--------------------------------------------------------------------- + 2 | | TEXT ME | | + 5 | | TEXT ME | | + 8 | | TEXT ME | | + 5 | | TEXT ME | | + 3 | 8 | TEXT ME | | + 1 | 4 | TEXT ME | | + 1 | 4 | ABCDEF | | (7 rows) -- unsupported type change diff --git a/src/test/regress/expected/am_analyze.out b/src/test/regress/expected/am_analyze.out index f8c4d974a..654391597 100644 --- a/src/test/regress/expected/am_analyze.out +++ b/src/test/regress/expected/am_analyze.out @@ -4,16 +4,16 @@ -- ANALYZE uncompressed table ANALYZE contestant; SELECT count(*) FROM pg_stats WHERE tablename='contestant'; - count -------- + count +--------------------------------------------------------------------- 6 (1 row) -- ANALYZE compressed table ANALYZE contestant_compressed; SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; - count -------- + count +--------------------------------------------------------------------- 6 (1 row) diff --git a/src/test/regress/expected/am_drop.out b/src/test/regress/expected/am_drop.out index 26de328f6..255a97693 100644 --- a/src/test/regress/expected/am_drop.out +++ b/src/test/regress/expected/am_drop.out @@ -18,8 +18,8 @@ DROP TABLE contestant; DROP TABLE contestant_compressed; -- make sure DROP deletes metadata SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 2 (1 row) @@ -30,21 +30,24 @@ SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \ DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to table test_schema.test_table SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 1 (1 row) SELECT current_database() datname \gset CREATE DATABASE db_to_drop; +NOTICE: Citus partially supports CREATE DATABASE for distributed databases +DETAIL: Citus does not propagate CREATE DATABASE command to workers +HINT: You can manually create a database and its extensions on workers. \c db_to_drop -CREATE EXTENSION cstore_fdw; +CREATE EXTENSION citus; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE TABLE test_table(data int) USING cstore_tableam; -DROP EXTENSION cstore_fdw CASCADE; +DROP EXTENSION citus CASCADE; NOTICE: drop cascades to table test_table -- test database drop -CREATE EXTENSION cstore_fdw; +CREATE EXTENSION citus; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE TABLE test_table(data int) USING cstore_tableam; \c :datname diff --git a/src/test/regress/expected/am_functions.out b/src/test/regress/expected/am_functions.out index 6351ba0bf..288b80b6e 100644 --- a/src/test/regress/expected/am_functions.out +++ b/src/test/regress/expected/am_functions.out @@ -6,8 +6,8 @@ CREATE TABLE table_with_data (a int) USING cstore_tableam; CREATE TABLE non_cstore_table (a int); COPY table_with_data FROM STDIN; SELECT pg_relation_size('empty_table') < pg_relation_size('table_with_data'); - ?column? ----------- + ?column? +--------------------------------------------------------------------- t (1 row) diff --git a/src/test/regress/expected/am_insert.out b/src/test/regress/expected/am_insert.out index 8d06d4323..460c42a24 100644 --- a/src/test/regress/expected/am_insert.out +++ b/src/test/regress/expected/am_insert.out @@ -4,44 +4,44 @@ CREATE TABLE test_insert_command (a int) USING cstore_tableam; -- test single row inserts fail select count(*) from test_insert_command; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) insert into test_insert_command values(1); select count(*) from test_insert_command; - count -------- + count +--------------------------------------------------------------------- 1 (1 row) insert into test_insert_command default values; select count(*) from test_insert_command; - count -------- + count +--------------------------------------------------------------------- 2 (1 row) -- test inserting from another table succeed CREATE TABLE test_insert_command_data (a int); select count(*) from test_insert_command_data; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) insert into test_insert_command_data values(1); select count(*) from test_insert_command_data; - count -------- + count +--------------------------------------------------------------------- 1 (1 row) insert into test_insert_command select * from test_insert_command_data; select count(*) from test_insert_command; - count -------- + count +--------------------------------------------------------------------- 3 (1 row) @@ -68,8 +68,8 @@ DROP TABLE test_long_text; SELECT a.int_val FROM test_long_text_hash a, test_cstore_long_text c WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); - int_val ---------- + int_val +--------------------------------------------------------------------- 1 2 3 diff --git a/src/test/regress/expected/am_join.out b/src/test/regress/expected/am_join.out index fbb628187..b55a9fd3f 100644 --- a/src/test/regress/expected/am_join.out +++ b/src/test/regress/expected/am_join.out @@ -12,8 +12,8 @@ SELECT count(*) FROM users JOIN things ON (users.id = things.user_id) WHERE things.id > 290; - count -------- + count +--------------------------------------------------------------------- 10 (1 row) @@ -23,8 +23,8 @@ SELECT count(*) FROM users JOIN things ON (users.id = things.user_id) WHERE things.id > 299990; - QUERY PLAN --------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Aggregate -> Nested Loop Join Filter: (users.id = things.user_id) diff --git a/src/test/regress/expected/am_query.out b/src/test/regress/expected/am_query.out index 2f0ff6cc7..a72681458 100644 --- a/src/test/regress/expected/am_query.out +++ b/src/test/regress/expected/am_query.out @@ -5,21 +5,21 @@ SET datestyle = "ISO, YMD"; -- Query uncompressed data SELECT count(*) FROM contestant; - count -------- + count +--------------------------------------------------------------------- 8 (1 row) SELECT avg(rating), stddev_samp(rating) FROM contestant; - avg | stddev_samp ------------------------+------------------ + avg | stddev_samp +--------------------------------------------------------------------- 2344.3750000000000000 | 433.746119785032 (1 row) SELECT country, avg(rating) FROM contestant WHERE rating > 2200 GROUP BY country ORDER BY country; - country | avg ----------+----------------------- + country | avg +--------------------------------------------------------------------- XA | 2203.0000000000000000 XB | 2610.5000000000000000 XC | 2236.0000000000000000 @@ -27,8 +27,8 @@ SELECT country, avg(rating) FROM contestant WHERE rating > 2200 (4 rows) SELECT * FROM contestant ORDER BY handle; - handle | birthdate | rating | percentile | country | achievements ---------+------------+--------+------------+---------+-------------- + handle | birthdate | rating | percentile | country | achievements +--------------------------------------------------------------------- a | 1990-01-10 | 2090 | 97.1 | XA | {a} b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} @@ -41,21 +41,21 @@ SELECT * FROM contestant ORDER BY handle; -- Query compressed data SELECT count(*) FROM contestant_compressed; - count -------- + count +--------------------------------------------------------------------- 8 (1 row) SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; - avg | stddev_samp ------------------------+------------------ + avg | stddev_samp +--------------------------------------------------------------------- 2344.3750000000000000 | 433.746119785032 (1 row) SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 GROUP BY country ORDER BY country; - country | avg ----------+----------------------- + country | avg +--------------------------------------------------------------------- XA | 2203.0000000000000000 XB | 2610.5000000000000000 XC | 2236.0000000000000000 @@ -63,8 +63,8 @@ SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 (4 rows) SELECT * FROM contestant_compressed ORDER BY handle; - handle | birthdate | rating | percentile | country | achievements ---------+------------+--------+------------+---------+-------------- + handle | birthdate | rating | percentile | country | achievements +--------------------------------------------------------------------- a | 1990-01-10 | 2090 | 97.1 | XA | {a} b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} @@ -77,8 +77,8 @@ SELECT * FROM contestant_compressed ORDER BY handle; -- Verify that we handle whole-row references correctly SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; - to_json ------------------------------------------------------------------------------------------------------------------- + to_json +--------------------------------------------------------------------- {"handle":"g","birthdate":"1991-12-13","rating":1803,"percentile":85.1,"country":"XD ","achievements":["a","c"]} (1 row) @@ -88,8 +88,8 @@ CREATE TABLE union_second (a int, b int) USING cstore_tableam; INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; (SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); - ?column? | b -----------+---- + ?column? | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 diff --git a/src/test/regress/expected/am_rollback.out b/src/test/regress/expected/am_rollback.out index 130baaa3a..e58ef5135 100644 --- a/src/test/regress/expected/am_rollback.out +++ b/src/test/regress/expected/am_rollback.out @@ -6,30 +6,30 @@ BEGIN; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; ROLLBACK; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) -- check stripe metadata also have been rolled-back SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 10 (1 row) SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; - count -------- + count +--------------------------------------------------------------------- 1 (1 row) @@ -40,37 +40,37 @@ INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SAVEPOINT s1; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 30 (1 row) ROLLBACK TO SAVEPOINT s1; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 20 (1 row) ROLLBACK TO SAVEPOINT s0; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 10 (1 row) INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; COMMIT; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 20 (1 row) SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; - count -------- + count +--------------------------------------------------------------------- 2 (1 row) diff --git a/src/test/regress/expected/am_tableoptions.out b/src/test/regress/expected/am_tableoptions.out index e5e0f9a4f..3b43dd53f 100644 --- a/src/test/regress/expected/am_tableoptions.out +++ b/src/test/regress/expected/am_tableoptions.out @@ -5,53 +5,53 @@ INSERT INTO table_options SELECT generate_series(1,100); -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 10000 | 150000 | none (1 row) -- test changing the compression SELECT alter_cstore_table_set('table_options', compression => 'pglz'); - alter_cstore_table_set ------------------------- - + alter_cstore_table_set +--------------------------------------------------------------------- + (1 row) -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 10000 | 150000 | pglz (1 row) -- test changing the block_row_count SELECT alter_cstore_table_set('table_options', block_row_count => 10); - alter_cstore_table_set ------------------------- - + alter_cstore_table_set +--------------------------------------------------------------------- + (1 row) -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 10 | 150000 | pglz (1 row) -- test changing the block_row_count SELECT alter_cstore_table_set('table_options', stripe_row_count => 100); - alter_cstore_table_set ------------------------- - + alter_cstore_table_set +--------------------------------------------------------------------- + (1 row) -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 10 | 100 | pglz (1 row) @@ -60,23 +60,23 @@ VACUUM FULL table_options; -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 10 | 100 | pglz (1 row) -- set all settings at the same time SELECT alter_cstore_table_set('table_options', stripe_row_count => 1000, block_row_count => 100, compression => 'none'); - alter_cstore_table_set ------------------------- - + alter_cstore_table_set +--------------------------------------------------------------------- + (1 row) -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 100 | 1000 | none (1 row) @@ -88,50 +88,50 @@ SET cstore.compression TO 'pglz'; -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 100 | 1000 | none (1 row) SELECT alter_cstore_table_reset('table_options', block_row_count => true); - alter_cstore_table_reset --------------------------- - + alter_cstore_table_reset +--------------------------------------------------------------------- + (1 row) -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 1000 | 1000 | none (1 row) SELECT alter_cstore_table_reset('table_options', stripe_row_count => true); - alter_cstore_table_reset --------------------------- - + alter_cstore_table_reset +--------------------------------------------------------------------- + (1 row) -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 1000 | 10000 | none (1 row) SELECT alter_cstore_table_reset('table_options', compression => true); - alter_cstore_table_reset --------------------------- - + alter_cstore_table_reset +--------------------------------------------------------------------- + (1 row) -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 1000 | 10000 | pglz (1 row) @@ -142,8 +142,8 @@ SET cstore.compression TO 'none'; -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 1000 | 10000 | pglz (1 row) @@ -152,16 +152,16 @@ SELECT alter_cstore_table_reset( block_row_count => true, stripe_row_count => true, compression => true); - alter_cstore_table_reset --------------------------- - + alter_cstore_table_reset +--------------------------------------------------------------------- + (1 row) -- show table_options settings SELECT * FROM cstore.cstore_options WHERE regclass = 'table_options'::regclass; - regclass | block_row_count | stripe_row_count | compression ----------------+-----------------+------------------+------------- + regclass | block_row_count | stripe_row_count | compression +--------------------------------------------------------------------- table_options | 10000 | 100000 | none (1 row) diff --git a/src/test/regress/expected/am_trigger.out b/src/test/regress/expected/am_trigger.out index 53b2c9d9e..f289b7dad 100644 --- a/src/test/regress/expected/am_trigger.out +++ b/src/test/regress/expected/am_trigger.out @@ -50,16 +50,28 @@ ERROR: AFTER ROW triggers are not supported for columnstore access method HINT: Consider an AFTER STATEMENT trigger instead. insert into test_tr values(1); NOTICE: BEFORE STATEMENT INSERT +CONTEXT: PL/pgSQL function trs_before() line 3 at RAISE NOTICE: BEFORE ROW INSERT: (1) +CONTEXT: PL/pgSQL function trr_before() line 3 at RAISE NOTICE: AFTER STATEMENT INSERT +CONTEXT: PL/pgSQL function trs_after() line 5 at RAISE NOTICE: (1) +CONTEXT: PL/pgSQL function trs_after() line 14 at RAISE insert into test_tr values(2),(3),(4); NOTICE: BEFORE STATEMENT INSERT +CONTEXT: PL/pgSQL function trs_before() line 3 at RAISE NOTICE: BEFORE ROW INSERT: (2) +CONTEXT: PL/pgSQL function trr_before() line 3 at RAISE NOTICE: BEFORE ROW INSERT: (3) +CONTEXT: PL/pgSQL function trr_before() line 3 at RAISE NOTICE: BEFORE ROW INSERT: (4) +CONTEXT: PL/pgSQL function trr_before() line 3 at RAISE NOTICE: AFTER STATEMENT INSERT +CONTEXT: PL/pgSQL function trs_after() line 5 at RAISE NOTICE: (2) +CONTEXT: PL/pgSQL function trs_after() line 14 at RAISE NOTICE: (3) +CONTEXT: PL/pgSQL function trs_after() line 14 at RAISE NOTICE: (4) +CONTEXT: PL/pgSQL function trs_after() line 14 at RAISE drop table test_tr; diff --git a/src/test/regress/expected/am_truncate.out b/src/test/regress/expected/am_truncate.out index 245c72062..cfa13bef5 100644 --- a/src/test/regress/expected/am_truncate.out +++ b/src/test/regress/expected/am_truncate.out @@ -4,8 +4,8 @@ -- print whether we're using version > 10 to make version-specific tests clear SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; - version_above_ten -------------------- + version_above_ten +--------------------------------------------------------------------- t (1 row) @@ -23,8 +23,8 @@ INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, set cstore.compression to default; -- query rows SELECT * FROM cstore_truncate_test; - a | b -----+---- + a | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 @@ -39,32 +39,32 @@ SELECT * FROM cstore_truncate_test; TRUNCATE TABLE cstore_truncate_test; SELECT * FROM cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT COUNT(*) from cstore_truncate_test; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) SELECT count(*) FROM cstore_truncate_test_compressed; - count -------- + count +--------------------------------------------------------------------- 20 (1 row) TRUNCATE TABLE cstore_truncate_test_compressed; SELECT count(*) FROM cstore_truncate_test_compressed; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) SELECT pg_relation_size('cstore_truncate_test_compressed'); - pg_relation_size ------------------- + pg_relation_size +--------------------------------------------------------------------- 0 (1 row) @@ -72,8 +72,8 @@ INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; SELECT * from cstore_truncate_test; - a | b -----+---- + a | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 @@ -87,8 +87,8 @@ SELECT * from cstore_truncate_test; (10 rows) SELECT * from cstore_truncate_test_second; - a | b -----+---- + a | b +--------------------------------------------------------------------- 20 | 20 21 | 21 22 | 22 @@ -103,8 +103,8 @@ SELECT * from cstore_truncate_test_second; (11 rows) SELECT * from cstore_truncate_test_regular; - a | b -----+---- + a | b +--------------------------------------------------------------------- 10 | 10 11 | 11 12 | 12 @@ -125,31 +125,31 @@ TRUNCATE TABLE cstore_truncate_test, cstore_truncate_test_second, cstore_truncate_test; SELECT * from cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT * from cstore_truncate_test_second; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT * from cstore_truncate_test_regular; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) -- test if truncate on empty table works TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) -- make sure TRUNATE deletes metadata for old relfilenode SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 0 (1 row) @@ -162,14 +162,14 @@ INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 2 COMMIT; -- should output "1" for the newly created relation SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 1 (1 row) SELECT * FROM cstore_same_transaction_truncate; - a ----- + a +--------------------------------------------------------------------- 20 21 22 @@ -185,16 +185,16 @@ BEGIN END;$$ LANGUAGE plpgsql; SELECT cstore_truncate_test_regular_func(); - cstore_truncate_test_regular_func ------------------------------------ - + cstore_truncate_test_regular_func +--------------------------------------------------------------------- + (1 row) -- the cached plans are used stating from the second call SELECT cstore_truncate_test_regular_func(); - cstore_truncate_test_regular_func ------------------------------------ - + cstore_truncate_test_regular_func +--------------------------------------------------------------------- + (1 row) DROP FUNCTION cstore_truncate_test_regular_func(); @@ -209,15 +209,15 @@ set cstore.compression = 'pglz'; INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); set cstore.compression to default; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) @@ -226,6 +226,8 @@ INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); set cstore.compression to default; -- create a user that can not truncate CREATE USER truncate_user; +NOTICE: not propagating CREATE ROLE/USER commands to worker nodes +HINT: Connect to worker nodes directly to manually create all necessary users and roles. GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; @@ -233,34 +235,34 @@ SELECT current_user \gset \c - truncate_user -- verify truncate command fails and check number of rows SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; ERROR: permission denied for table truncate_tbl SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) --- switch to super user, grant truncate to truncate_user +-- switch to super user, grant truncate to truncate_user \c - :current_user GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; -- verify truncate_user can truncate now \c - truncate_user SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) diff --git a/src/test/regress/expected/am_truncate_0.out b/src/test/regress/expected/am_truncate_0.out index c8cc4ad98..073e8f042 100644 --- a/src/test/regress/expected/am_truncate_0.out +++ b/src/test/regress/expected/am_truncate_0.out @@ -4,8 +4,8 @@ -- print whether we're using version > 10 to make version-specific tests clear SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; - version_above_ten -------------------- + version_above_ten +--------------------------------------------------------------------- f (1 row) @@ -15,8 +15,8 @@ SELECT count(*) FROM ( SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() ) AS q1) AS q2; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) @@ -30,8 +30,8 @@ INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; -- query rows SELECT * FROM cstore_truncate_test; - a | b -----+---- + a | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 @@ -46,42 +46,42 @@ SELECT * FROM cstore_truncate_test; TRUNCATE TABLE cstore_truncate_test; SELECT * FROM cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT COUNT(*) from cstore_truncate_test; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) SELECT count(*) FROM cstore_truncate_test_compressed; - count -------- + count +--------------------------------------------------------------------- 20 (1 row) TRUNCATE TABLE cstore_truncate_test_compressed; SELECT count(*) FROM cstore_truncate_test_compressed; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) SELECT cstore_table_size('cstore_truncate_test_compressed'); - cstore_table_size -------------------- + cstore_table_size +--------------------------------------------------------------------- 26 (1 row) --- make sure data files still present +-- make sure data files still present SELECT count(*) FROM ( SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() ) AS q1) AS q2; - count -------- + count +--------------------------------------------------------------------- 6 (1 row) @@ -89,8 +89,8 @@ INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; SELECT * from cstore_truncate_test; - a | b -----+---- + a | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 @@ -104,8 +104,8 @@ SELECT * from cstore_truncate_test; (10 rows) SELECT * from cstore_truncate_test_second; - a | b -----+---- + a | b +--------------------------------------------------------------------- 20 | 20 21 | 21 22 | 22 @@ -120,8 +120,8 @@ SELECT * from cstore_truncate_test_second; (11 rows) SELECT * from cstore_truncate_test_regular; - a | b -----+---- + a | b +--------------------------------------------------------------------- 10 | 10 11 | 11 12 | 12 @@ -142,25 +142,25 @@ TRUNCATE TABLE cstore_truncate_test, cstore_truncate_test_second, cstore_truncate_test; SELECT * from cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT * from cstore_truncate_test_second; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT * from cstore_truncate_test_regular; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) -- test if truncate on empty table works TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) -- test if a cached truncate from a pl/pgsql function works @@ -171,16 +171,16 @@ BEGIN END;$$ LANGUAGE plpgsql; SELECT cstore_truncate_test_regular_func(); - cstore_truncate_test_regular_func ------------------------------------ - + cstore_truncate_test_regular_func +--------------------------------------------------------------------- + (1 row) -- the cached plans are used stating from the second call SELECT cstore_truncate_test_regular_func(); - cstore_truncate_test_regular_func ------------------------------------ - + cstore_truncate_test_regular_func +--------------------------------------------------------------------- + (1 row) DROP FUNCTION cstore_truncate_test_regular_func(); @@ -192,15 +192,15 @@ CREATE SCHEMA truncate_schema; CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) @@ -214,34 +214,34 @@ SELECT current_user \gset \c - truncate_user -- verify truncate command fails and check number of rows SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; ERROR: permission denied for relation truncate_tbl SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) --- switch to super user, grant truncate to truncate_user +-- switch to super user, grant truncate to truncate_user \c - :current_user GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; -- verify truncate_user can truncate now \c - truncate_user SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) @@ -255,8 +255,8 @@ SELECT count(*) FROM ( SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() ) AS q1) AS q2; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) diff --git a/src/test/regress/expected/am_vacuum.out b/src/test/regress/expected/am_vacuum.out index 3975be12b..15da28fe0 100644 --- a/src/test/regress/expected/am_vacuum.out +++ b/src/test/regress/expected/am_vacuum.out @@ -1,8 +1,8 @@ SELECT count(*) AS columnar_table_count FROM cstore.cstore_data_files \gset CREATE TABLE t(a int, b int) USING cstore_tableam; SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) @@ -10,69 +10,69 @@ INSERT INTO t SELECT i, i * i FROM generate_series(1, 10) i; INSERT INTO t SELECT i, i * i FROM generate_series(11, 20) i; INSERT INTO t SELECT i, i * i FROM generate_series(21, 30) i; SELECT sum(a), sum(b) FROM t; - sum | sum ------+------ + sum | sum +--------------------------------------------------------------------- 465 | 9455 (1 row) SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; - count -------- + count +--------------------------------------------------------------------- 3 (1 row) -- vacuum full should merge stripes together VACUUM FULL t; SELECT sum(a), sum(b) FROM t; - sum | sum ------+------ + sum | sum +--------------------------------------------------------------------- 465 | 9455 (1 row) SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; - count -------- + count +--------------------------------------------------------------------- 1 (1 row) -- test the case when all data cannot fit into a single stripe SELECT alter_cstore_table_set('t', stripe_row_count => 1000); - alter_cstore_table_set ------------------------- - + alter_cstore_table_set +--------------------------------------------------------------------- + (1 row) INSERT INTO t SELECT i, 2 * i FROM generate_series(1,2500) i; SELECT sum(a), sum(b) FROM t; - sum | sum ----------+--------- + sum | sum +--------------------------------------------------------------------- 3126715 | 6261955 (1 row) SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; - count -------- + count +--------------------------------------------------------------------- 4 (1 row) VACUUM FULL t; SELECT sum(a), sum(b) FROM t; - sum | sum ----------+--------- + sum | sum +--------------------------------------------------------------------- 3126715 | 6261955 (1 row) SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t'; - count -------- + count +--------------------------------------------------------------------- 3 (1 row) -- VACUUM FULL doesn't reclaim dropped columns, but converts them to NULLs ALTER TABLE t DROP COLUMN a; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; - stripe | attr | block | ?column? | ?column? ---------+------+-------+----------+---------- + stripe | attr | block | ?column? | ?column? +--------------------------------------------------------------------- 1 | 1 | 0 | f | f 1 | 2 | 0 | f | f 2 | 1 | 0 | f | f @@ -83,8 +83,8 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs VACUUM FULL t; SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cstore.cstore_skipnodes a, pg_class b WHERE a.relfilenode=b.relfilenode AND b.relname='t' ORDER BY 1, 2, 3; - stripe | attr | block | ?column? | ?column? ---------+------+-------+----------+---------- + stripe | attr | block | ?column? | ?column? +--------------------------------------------------------------------- 1 | 1 | 0 | t | t 1 | 2 | 0 | f | f 2 | 1 | 0 | t | t @@ -95,8 +95,8 @@ SELECT stripe, attr, block, minimum_value IS NULL, maximum_value IS NULL FROM cs -- Make sure we cleaned-up the transient table metadata after VACUUM FULL commands SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 1 (1 row) @@ -104,27 +104,27 @@ SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; BEGIN; SAVEPOINT s1; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 2530 (1 row) SELECT pg_size_pretty(pg_relation_size('t')); - pg_size_pretty ----------------- + pg_size_pretty +--------------------------------------------------------------------- 32 kB (1 row) INSERT INTO t SELECT i FROM generate_series(1, 10000) i; SELECT pg_size_pretty(pg_relation_size('t')); - pg_size_pretty ----------------- + pg_size_pretty +--------------------------------------------------------------------- 112 kB (1 row) SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 12530 (1 row) @@ -132,8 +132,8 @@ ROLLBACK TO SAVEPOINT s1; -- not truncated by VACUUM or autovacuum yet (being in transaction ensures this), -- so relation size should be same as before. SELECT pg_size_pretty(pg_relation_size('t')); - pg_size_pretty ----------------- + pg_size_pretty +--------------------------------------------------------------------- 112 kB (1 row) @@ -148,14 +148,14 @@ block count: 3, containing data for dropped columns: 0, none compressed: 3, pglz INFO: "t": truncated 14 to 4 pages DETAIL: CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s SELECT pg_size_pretty(pg_relation_size('t')); - pg_size_pretty ----------------- + pg_size_pretty +--------------------------------------------------------------------- 32 kB (1 row) SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 2530 (1 row) @@ -166,9 +166,9 @@ SELECT alter_cstore_table_set('t', block_row_count => 1000, stripe_row_count => 2000, compression => 'pglz'); - alter_cstore_table_set ------------------------- - + alter_cstore_table_set +--------------------------------------------------------------------- + (1 row) SAVEPOINT s1; @@ -176,9 +176,9 @@ INSERT INTO t SELECT i FROM generate_series(1, 1500) i; ROLLBACK TO SAVEPOINT s1; INSERT INTO t SELECT i / 5 FROM generate_series(1, 1500) i; SELECT alter_cstore_table_set('t', compression => 'none'); - alter_cstore_table_set ------------------------- - + alter_cstore_table_set +--------------------------------------------------------------------- + (1 row) SAVEPOINT s2; @@ -193,8 +193,8 @@ total row count: 5530, stripe count: 5, average rows per stripe: 1106 block count: 7, containing data for dropped columns: 0, none compressed: 5, pglz compressed: 2 SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 5530 (1 row) @@ -212,9 +212,9 @@ block count: 11, containing data for dropped columns: 2, none compressed: 9, pgl -- note that, a block will be stored in non-compressed for if compression -- doesn't reduce its size. SELECT alter_cstore_table_set('t', compression => 'pglz'); - alter_cstore_table_set ------------------------- - + alter_cstore_table_set +--------------------------------------------------------------------- + (1 row) VACUUM FULL t; @@ -227,8 +227,8 @@ block count: 8, containing data for dropped columns: 0, none compressed: 2, pglz DROP TABLE t; -- Make sure we cleaned the metadata for t too SELECT count(*) - :columnar_table_count FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 0 (1 row) diff --git a/src/test/regress/expected/extension_create.out b/src/test/regress/expected/extension_create.out deleted file mode 100644 index c4d94e1e5..000000000 --- a/src/test/regress/expected/extension_create.out +++ /dev/null @@ -1,2 +0,0 @@ --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; diff --git a/src/test/regress/expected/fdw_alter.out b/src/test/regress/expected/fdw_alter.out index 659e2723e..7e69a4178 100644 --- a/src/test/regress/expected/fdw_alter.out +++ b/src/test/regress/expected/fdw_alter.out @@ -14,8 +14,8 @@ ALTER FOREIGN TABLE test_alter_table DROP COLUMN a; ANALYZE test_alter_table; -- verify select queries run as expected SELECT * FROM test_alter_table; - b | c ----+--- + b | c +--------------------------------------------------------------------- 2 | 3 5 | 6 8 | 9 @@ -23,11 +23,9 @@ SELECT * FROM test_alter_table; SELECT a FROM test_alter_table; ERROR: column "a" does not exist -LINE 1: SELECT a FROM test_alter_table; - ^ SELECT b FROM test_alter_table; - b ---- + b +--------------------------------------------------------------------- 2 5 8 @@ -36,36 +34,34 @@ SELECT b FROM test_alter_table; -- verify insert runs as expected INSERT INTO test_alter_table (SELECT 3, 5, 8); ERROR: INSERT has more expressions than target columns -LINE 1: INSERT INTO test_alter_table (SELECT 3, 5, 8); - ^ INSERT INTO test_alter_table (SELECT 5, 8); -- add a column with no defaults ALTER FOREIGN TABLE test_alter_table ADD COLUMN d int; SELECT * FROM test_alter_table; - b | c | d ----+---+--- - 2 | 3 | - 5 | 6 | - 8 | 9 | - 5 | 8 | + b | c | d +--------------------------------------------------------------------- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | (4 rows) INSERT INTO test_alter_table (SELECT 3, 5, 8); SELECT * FROM test_alter_table; - b | c | d ----+---+--- - 2 | 3 | - 5 | 6 | - 8 | 9 | - 5 | 8 | + b | c | d +--------------------------------------------------------------------- + 2 | 3 | + 5 | 6 | + 8 | 9 | + 5 | 8 | 3 | 5 | 8 (5 rows) -- add a fixed-length column with default value ALTER FOREIGN TABLE test_alter_table ADD COLUMN e int default 3; SELECT * from test_alter_table; - b | c | d | e ----+---+---+--- + b | c | d | e +--------------------------------------------------------------------- 2 | 3 | | 3 5 | 6 | | 3 8 | 9 | | 3 @@ -75,8 +71,8 @@ SELECT * from test_alter_table; INSERT INTO test_alter_table (SELECT 1, 2, 4, 8); SELECT * from test_alter_table; - b | c | d | e ----+---+---+--- + b | c | d | e +--------------------------------------------------------------------- 2 | 3 | | 3 5 | 6 | | 3 8 | 9 | | 3 @@ -88,8 +84,8 @@ SELECT * from test_alter_table; -- add a variable-length column with default value ALTER FOREIGN TABLE test_alter_table ADD COLUMN f text DEFAULT 'TEXT ME'; SELECT * from test_alter_table; - b | c | d | e | f ----+---+---+---+--------- + b | c | d | e | f +--------------------------------------------------------------------- 2 | 3 | | 3 | TEXT ME 5 | 6 | | 3 | TEXT ME 8 | 9 | | 3 | TEXT ME @@ -100,8 +96,8 @@ SELECT * from test_alter_table; INSERT INTO test_alter_table (SELECT 1, 2, 4, 8, 'ABCDEF'); SELECT * from test_alter_table; - b | c | d | e | f ----+---+---+---+--------- + b | c | d | e | f +--------------------------------------------------------------------- 2 | 3 | | 3 | TEXT ME 5 | 6 | | 3 | TEXT ME 8 | 9 | | 3 | TEXT ME @@ -116,8 +112,8 @@ ALTER FOREIGN TABLE test_alter_table DROP COLUMN c; ALTER FOREIGN TABLE test_alter_table DROP COLUMN e; ANALYZE test_alter_table; SELECT * from test_alter_table; - b | d | f ----+---+--------- + b | d | f +--------------------------------------------------------------------- 2 | | TEXT ME 5 | | TEXT ME 8 | | TEXT ME @@ -128,14 +124,14 @@ SELECT * from test_alter_table; (7 rows) SELECT count(*) from test_alter_table; - count -------- + count +--------------------------------------------------------------------- 7 (1 row) SELECT count(t.*) from test_alter_table t; - count -------- + count +--------------------------------------------------------------------- 7 (1 row) @@ -152,15 +148,15 @@ HINT: Expression is either mutable or does not evaluate to constant value ALTER FOREIGN TABLE test_alter_table ALTER COLUMN h DROP DEFAULT; ANALYZE test_alter_table; SELECT * FROM test_alter_table; - b | d | f | g | h ----+---+---------+---+--- - 2 | | TEXT ME | | - 5 | | TEXT ME | | - 8 | | TEXT ME | | - 5 | | TEXT ME | | - 3 | 8 | TEXT ME | | - 1 | 4 | TEXT ME | | - 1 | 4 | ABCDEF | | + b | d | f | g | h +--------------------------------------------------------------------- + 2 | | TEXT ME | | + 5 | | TEXT ME | | + 8 | | TEXT ME | | + 5 | | TEXT ME | | + 3 | 8 | TEXT ME | | + 1 | 4 | TEXT ME | | + 1 | 4 | ABCDEF | | (7 rows) -- unsupported type change diff --git a/src/test/regress/expected/fdw_analyze.out b/src/test/regress/expected/fdw_analyze.out index f8c4d974a..654391597 100644 --- a/src/test/regress/expected/fdw_analyze.out +++ b/src/test/regress/expected/fdw_analyze.out @@ -4,16 +4,16 @@ -- ANALYZE uncompressed table ANALYZE contestant; SELECT count(*) FROM pg_stats WHERE tablename='contestant'; - count -------- + count +--------------------------------------------------------------------- 6 (1 row) -- ANALYZE compressed table ANALYZE contestant_compressed; SELECT count(*) FROM pg_stats WHERE tablename='contestant_compressed'; - count -------- + count +--------------------------------------------------------------------- 6 (1 row) diff --git a/src/test/regress/expected/fdw_drop.out b/src/test/regress/expected/fdw_drop.out index e1ddf0fd0..def073b20 100644 --- a/src/test/regress/expected/fdw_drop.out +++ b/src/test/regress/expected/fdw_drop.out @@ -18,8 +18,8 @@ DROP FOREIGN TABLE contestant; DROP FOREIGN TABLE contestant_compressed; -- make sure DROP deletes metadata SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 2 (1 row) @@ -30,24 +30,27 @@ SELECT count(*) AS cstore_data_files_before_drop FROM cstore.cstore_data_files \ DROP SCHEMA test_schema CASCADE; NOTICE: drop cascades to foreign table test_schema.test_table SELECT :cstore_data_files_before_drop - count(*) FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 1 (1 row) SELECT current_database() datname \gset CREATE DATABASE db_to_drop; +NOTICE: Citus partially supports CREATE DATABASE for distributed databases +DETAIL: Citus does not propagate CREATE DATABASE command to workers +HINT: You can manually create a database and its extensions on workers. \c db_to_drop -CREATE EXTENSION cstore_fdw; +CREATE EXTENSION citus; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; -DROP EXTENSION cstore_fdw CASCADE; +DROP EXTENSION citus CASCADE; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to server cstore_server drop cascades to foreign table test_table -- test database drop -CREATE EXTENSION cstore_fdw; +CREATE EXTENSION citus; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; diff --git a/src/test/regress/expected/fdw_functions.out b/src/test/regress/expected/fdw_functions.out index 117fc15f9..cf047f578 100644 --- a/src/test/regress/expected/fdw_functions.out +++ b/src/test/regress/expected/fdw_functions.out @@ -6,8 +6,8 @@ CREATE FOREIGN TABLE table_with_data (a int) SERVER cstore_server; CREATE TABLE non_cstore_table (a int); COPY table_with_data FROM STDIN; SELECT cstore_table_size('empty_table') < cstore_table_size('table_with_data'); - ?column? ----------- + ?column? +--------------------------------------------------------------------- t (1 row) diff --git a/src/test/regress/expected/fdw_insert.out b/src/test/regress/expected/fdw_insert.out index 49d9ed132..1cc94e678 100644 --- a/src/test/regress/expected/fdw_insert.out +++ b/src/test/regress/expected/fdw_insert.out @@ -4,46 +4,46 @@ CREATE FOREIGN TABLE test_insert_command (a int) SERVER cstore_server; -- test single row inserts fail select count(*) from test_insert_command; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) insert into test_insert_command values(1); ERROR: operation is not supported select count(*) from test_insert_command; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) insert into test_insert_command default values; ERROR: operation is not supported select count(*) from test_insert_command; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) -- test inserting from another table succeed CREATE TABLE test_insert_command_data (a int); select count(*) from test_insert_command_data; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) insert into test_insert_command_data values(1); select count(*) from test_insert_command_data; - count -------- + count +--------------------------------------------------------------------- 1 (1 row) insert into test_insert_command select * from test_insert_command_data; select count(*) from test_insert_command; - count -------- + count +--------------------------------------------------------------------- 1 (1 row) @@ -70,8 +70,8 @@ DROP TABLE test_long_text; SELECT a.int_val FROM test_long_text_hash a, test_cstore_long_text c WHERE a.int_val = c.int_val AND a.hash = md5(c.text_val); - int_val ---------- + int_val +--------------------------------------------------------------------- 1 2 3 diff --git a/src/test/regress/expected/fdw_query.out b/src/test/regress/expected/fdw_query.out index 7ac3508a4..a83b18451 100644 --- a/src/test/regress/expected/fdw_query.out +++ b/src/test/regress/expected/fdw_query.out @@ -5,21 +5,21 @@ SET datestyle = "ISO, YMD"; -- Query uncompressed data SELECT count(*) FROM contestant; - count -------- + count +--------------------------------------------------------------------- 8 (1 row) SELECT avg(rating), stddev_samp(rating) FROM contestant; - avg | stddev_samp ------------------------+------------------ + avg | stddev_samp +--------------------------------------------------------------------- 2344.3750000000000000 | 433.746119785032 (1 row) SELECT country, avg(rating) FROM contestant WHERE rating > 2200 GROUP BY country ORDER BY country; - country | avg ----------+----------------------- + country | avg +--------------------------------------------------------------------- XA | 2203.0000000000000000 XB | 2610.5000000000000000 XC | 2236.0000000000000000 @@ -27,8 +27,8 @@ SELECT country, avg(rating) FROM contestant WHERE rating > 2200 (4 rows) SELECT * FROM contestant ORDER BY handle; - handle | birthdate | rating | percentile | country | achievements ---------+------------+--------+------------+---------+-------------- + handle | birthdate | rating | percentile | country | achievements +--------------------------------------------------------------------- a | 1990-01-10 | 2090 | 97.1 | XA | {a} b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} @@ -41,21 +41,21 @@ SELECT * FROM contestant ORDER BY handle; -- Query compressed data SELECT count(*) FROM contestant_compressed; - count -------- + count +--------------------------------------------------------------------- 8 (1 row) SELECT avg(rating), stddev_samp(rating) FROM contestant_compressed; - avg | stddev_samp ------------------------+------------------ + avg | stddev_samp +--------------------------------------------------------------------- 2344.3750000000000000 | 433.746119785032 (1 row) SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 GROUP BY country ORDER BY country; - country | avg ----------+----------------------- + country | avg +--------------------------------------------------------------------- XA | 2203.0000000000000000 XB | 2610.5000000000000000 XC | 2236.0000000000000000 @@ -63,8 +63,8 @@ SELECT country, avg(rating) FROM contestant_compressed WHERE rating > 2200 (4 rows) SELECT * FROM contestant_compressed ORDER BY handle; - handle | birthdate | rating | percentile | country | achievements ---------+------------+--------+------------+---------+-------------- + handle | birthdate | rating | percentile | country | achievements +--------------------------------------------------------------------- a | 1990-01-10 | 2090 | 97.1 | XA | {a} b | 1990-11-01 | 2203 | 98.1 | XA | {a,b} c | 1988-11-01 | 2907 | 99.4 | XB | {w,y} @@ -77,8 +77,8 @@ SELECT * FROM contestant_compressed ORDER BY handle; -- Verify that we handle whole-row references correctly SELECT to_json(v) FROM contestant v ORDER BY rating LIMIT 1; - to_json ------------------------------------------------------------------------------------------------------------------- + to_json +--------------------------------------------------------------------- {"handle":"g","birthdate":"1991-12-13","rating":1803,"percentile":85.1,"country":"XD ","achievements":["a","c"]} (1 row) @@ -88,8 +88,8 @@ CREATE FOREIGN TABLE union_second (a int, b int) SERVER cstore_server; INSERT INTO union_first SELECT a, a FROM generate_series(1, 5) a; INSERT INTO union_second SELECT a, a FROM generate_series(11, 15) a; (SELECT a*1, b FROM union_first) union all (SELECT a*1, b FROM union_second); - ?column? | b -----------+---- + ?column? | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 diff --git a/src/test/regress/expected/fdw_rollback.out b/src/test/regress/expected/fdw_rollback.out index f50f9fd19..c70b2a49e 100644 --- a/src/test/regress/expected/fdw_rollback.out +++ b/src/test/regress/expected/fdw_rollback.out @@ -6,30 +6,30 @@ BEGIN; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; ROLLBACK; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) -- check stripe metadata also have been rolled-back SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 10 (1 row) SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; - count -------- + count +--------------------------------------------------------------------- 1 (1 row) @@ -40,37 +40,37 @@ INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SAVEPOINT s1; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 30 (1 row) ROLLBACK TO SAVEPOINT s1; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 20 (1 row) ROLLBACK TO SAVEPOINT s0; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 10 (1 row) INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; COMMIT; SELECT count(*) FROM t; - count -------- + count +--------------------------------------------------------------------- 20 (1 row) SELECT count(*) FROM cstore.cstore_stripes a, pg_class b WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; - count -------- + count +--------------------------------------------------------------------- 2 (1 row) diff --git a/src/test/regress/expected/fdw_truncate.out b/src/test/regress/expected/fdw_truncate.out index 6192c704c..a2c95da01 100644 --- a/src/test/regress/expected/fdw_truncate.out +++ b/src/test/regress/expected/fdw_truncate.out @@ -4,8 +4,8 @@ -- print whether we're using version > 10 to make version-specific tests clear SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; - version_above_ten -------------------- + version_above_ten +--------------------------------------------------------------------- t (1 row) @@ -20,8 +20,8 @@ INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; -- query rows SELECT * FROM cstore_truncate_test; - a | b -----+---- + a | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 @@ -36,32 +36,32 @@ SELECT * FROM cstore_truncate_test; TRUNCATE TABLE cstore_truncate_test; SELECT * FROM cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT COUNT(*) from cstore_truncate_test; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) SELECT count(*) FROM cstore_truncate_test_compressed; - count -------- + count +--------------------------------------------------------------------- 20 (1 row) TRUNCATE TABLE cstore_truncate_test_compressed; SELECT count(*) FROM cstore_truncate_test_compressed; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) SELECT cstore_table_size('cstore_truncate_test_compressed'); - cstore_table_size -------------------- + cstore_table_size +--------------------------------------------------------------------- 0 (1 row) @@ -69,8 +69,8 @@ INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; SELECT * from cstore_truncate_test; - a | b -----+---- + a | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 @@ -84,8 +84,8 @@ SELECT * from cstore_truncate_test; (10 rows) SELECT * from cstore_truncate_test_second; - a | b -----+---- + a | b +--------------------------------------------------------------------- 20 | 20 21 | 21 22 | 22 @@ -100,8 +100,8 @@ SELECT * from cstore_truncate_test_second; (11 rows) SELECT * from cstore_truncate_test_regular; - a | b -----+---- + a | b +--------------------------------------------------------------------- 10 | 10 11 | 11 12 | 12 @@ -122,31 +122,31 @@ TRUNCATE TABLE cstore_truncate_test, cstore_truncate_test_second, cstore_truncate_test; SELECT * from cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT * from cstore_truncate_test_second; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT * from cstore_truncate_test_regular; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) -- test if truncate on empty table works TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) -- make sure TRUNATE deletes metadata for old relfilenode SELECT :cstore_data_files_before_truncate - count(*) FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 0 (1 row) @@ -159,14 +159,14 @@ INSERT INTO cstore_same_transaction_truncate SELECT * FROM generate_series(20, 2 COMMIT; -- should output "1" for the newly created relation SELECT count(*) - :cstore_data_files_before_truncate FROM cstore.cstore_data_files; - ?column? ----------- + ?column? +--------------------------------------------------------------------- 1 (1 row) SELECT * FROM cstore_same_transaction_truncate; - a ----- + a +--------------------------------------------------------------------- 20 21 22 @@ -182,16 +182,16 @@ BEGIN END;$$ LANGUAGE plpgsql; SELECT cstore_truncate_test_regular_func(); - cstore_truncate_test_regular_func ------------------------------------ - + cstore_truncate_test_regular_func +--------------------------------------------------------------------- + (1 row) -- the cached plans are used stating from the second call SELECT cstore_truncate_test_regular_func(); - cstore_truncate_test_regular_func ------------------------------------ - + cstore_truncate_test_regular_func +--------------------------------------------------------------------- + (1 row) DROP FUNCTION cstore_truncate_test_regular_func(); @@ -203,21 +203,23 @@ CREATE SCHEMA truncate_schema; CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); -- create a user that can not truncate CREATE USER truncate_user; +NOTICE: not propagating CREATE ROLE/USER commands to worker nodes +HINT: Connect to worker nodes directly to manually create all necessary users and roles. GRANT USAGE ON SCHEMA truncate_schema TO truncate_user; GRANT SELECT ON TABLE truncate_schema.truncate_tbl TO truncate_user; REVOKE TRUNCATE ON TABLE truncate_schema.truncate_tbl FROM truncate_user; @@ -225,34 +227,34 @@ SELECT current_user \gset \c - truncate_user -- verify truncate command fails and check number of rows SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; ERROR: permission denied for table truncate_tbl SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) --- switch to super user, grant truncate to truncate_user +-- switch to super user, grant truncate to truncate_user \c - :current_user GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; -- verify truncate_user can truncate now \c - truncate_user SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) diff --git a/src/test/regress/expected/fdw_truncate_0.out b/src/test/regress/expected/fdw_truncate_0.out index c8cc4ad98..073e8f042 100644 --- a/src/test/regress/expected/fdw_truncate_0.out +++ b/src/test/regress/expected/fdw_truncate_0.out @@ -4,8 +4,8 @@ -- print whether we're using version > 10 to make version-specific tests clear SHOW server_version \gset SELECT substring(:'server_version', '\d+')::int > 10 AS version_above_ten; - version_above_ten -------------------- + version_above_ten +--------------------------------------------------------------------- f (1 row) @@ -15,8 +15,8 @@ SELECT count(*) FROM ( SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() ) AS q1) AS q2; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) @@ -30,8 +30,8 @@ INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, INSERT INTO cstore_truncate_test_compressed select a, a from generate_series(1, 10) a; -- query rows SELECT * FROM cstore_truncate_test; - a | b -----+---- + a | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 @@ -46,42 +46,42 @@ SELECT * FROM cstore_truncate_test; TRUNCATE TABLE cstore_truncate_test; SELECT * FROM cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT COUNT(*) from cstore_truncate_test; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) SELECT count(*) FROM cstore_truncate_test_compressed; - count -------- + count +--------------------------------------------------------------------- 20 (1 row) TRUNCATE TABLE cstore_truncate_test_compressed; SELECT count(*) FROM cstore_truncate_test_compressed; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) SELECT cstore_table_size('cstore_truncate_test_compressed'); - cstore_table_size -------------------- + cstore_table_size +--------------------------------------------------------------------- 26 (1 row) --- make sure data files still present +-- make sure data files still present SELECT count(*) FROM ( SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() ) AS q1) AS q2; - count -------- + count +--------------------------------------------------------------------- 6 (1 row) @@ -89,8 +89,8 @@ INSERT INTO cstore_truncate_test select a, a from generate_series(1, 10) a; INSERT INTO cstore_truncate_test_regular select a, a from generate_series(10, 20) a; INSERT INTO cstore_truncate_test_second select a, a from generate_series(20, 30) a; SELECT * from cstore_truncate_test; - a | b -----+---- + a | b +--------------------------------------------------------------------- 1 | 1 2 | 2 3 | 3 @@ -104,8 +104,8 @@ SELECT * from cstore_truncate_test; (10 rows) SELECT * from cstore_truncate_test_second; - a | b -----+---- + a | b +--------------------------------------------------------------------- 20 | 20 21 | 21 22 | 22 @@ -120,8 +120,8 @@ SELECT * from cstore_truncate_test_second; (11 rows) SELECT * from cstore_truncate_test_regular; - a | b -----+---- + a | b +--------------------------------------------------------------------- 10 | 10 11 | 11 12 | 12 @@ -142,25 +142,25 @@ TRUNCATE TABLE cstore_truncate_test, cstore_truncate_test_second, cstore_truncate_test; SELECT * from cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT * from cstore_truncate_test_second; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) SELECT * from cstore_truncate_test_regular; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) -- test if truncate on empty table works TRUNCATE TABLE cstore_truncate_test; SELECT * from cstore_truncate_test; - a | b ----+--- + a | b +--------------------------------------------------------------------- (0 rows) -- test if a cached truncate from a pl/pgsql function works @@ -171,16 +171,16 @@ BEGIN END;$$ LANGUAGE plpgsql; SELECT cstore_truncate_test_regular_func(); - cstore_truncate_test_regular_func ------------------------------------ - + cstore_truncate_test_regular_func +--------------------------------------------------------------------- + (1 row) -- the cached plans are used stating from the second call SELECT cstore_truncate_test_regular_func(); - cstore_truncate_test_regular_func ------------------------------------ - + cstore_truncate_test_regular_func +--------------------------------------------------------------------- + (1 row) DROP FUNCTION cstore_truncate_test_regular_func(); @@ -192,15 +192,15 @@ CREATE SCHEMA truncate_schema; CREATE FOREIGN TABLE truncate_schema.truncate_tbl (id int) SERVER cstore_server OPTIONS(compression 'pglz'); INSERT INTO truncate_schema.truncate_tbl SELECT generate_series(1, 100); SELECT COUNT(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT COUNT(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) @@ -214,34 +214,34 @@ SELECT current_user \gset \c - truncate_user -- verify truncate command fails and check number of rows SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; ERROR: permission denied for relation truncate_tbl SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) --- switch to super user, grant truncate to truncate_user +-- switch to super user, grant truncate to truncate_user \c - :current_user GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; -- verify truncate_user can truncate now \c - truncate_user SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 100 (1 row) TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT count(*) FROM truncate_schema.truncate_tbl; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) @@ -255,8 +255,8 @@ SELECT count(*) FROM ( SELECT pg_ls_dir('cstore_fdw/' || databaseoid ) FROM ( SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() ) AS q1) AS q2; - count -------- + count +--------------------------------------------------------------------- 0 (1 row) diff --git a/src/test/regress/sql/.gitignore b/src/test/regress/sql/.gitignore index f2314ba83..145bbaeed 100644 --- a/src/test/regress/sql/.gitignore +++ b/src/test/regress/sql/.gitignore @@ -20,3 +20,13 @@ /multi_behavioral_analytics_create_table.sql /multi_insert_select_behavioral_analytics_create_table.sql /hyperscale_tutorial.sql +/am_block_filtering.sql +/am_copyto.sql +/am_create.sql +/am_data_types.sql +/am_load.sql +/fdw_block_filtering.sql +/fdw_copyto.sql +/fdw_create.sql +/fdw_data_types.sql +/fdw_load.sql diff --git a/src/test/regress/sql/am_drop.sql b/src/test/regress/sql/am_drop.sql index 080712881..7958a1407 100644 --- a/src/test/regress/sql/am_drop.sql +++ b/src/test/regress/sql/am_drop.sql @@ -36,15 +36,15 @@ SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop -CREATE EXTENSION cstore_fdw; +CREATE EXTENSION citus; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE TABLE test_table(data int) USING cstore_tableam; -DROP EXTENSION cstore_fdw CASCADE; +DROP EXTENSION citus CASCADE; -- test database drop -CREATE EXTENSION cstore_fdw; +CREATE EXTENSION citus; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE TABLE test_table(data int) USING cstore_tableam; diff --git a/src/test/regress/sql/extension_create.sql b/src/test/regress/sql/extension_create.sql deleted file mode 100644 index 2e73f5be7..000000000 --- a/src/test/regress/sql/extension_create.sql +++ /dev/null @@ -1,4 +0,0 @@ - --- Install cstore_fdw -CREATE EXTENSION cstore_fdw; - diff --git a/src/test/regress/sql/fdw_drop.sql b/src/test/regress/sql/fdw_drop.sql index f89374a5a..3246ed016 100644 --- a/src/test/regress/sql/fdw_drop.sql +++ b/src/test/regress/sql/fdw_drop.sql @@ -36,16 +36,16 @@ SELECT current_database() datname \gset CREATE DATABASE db_to_drop; \c db_to_drop -CREATE EXTENSION cstore_fdw; +CREATE EXTENSION citus; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset CREATE FOREIGN TABLE test_table(data int) SERVER cstore_server; -DROP EXTENSION cstore_fdw CASCADE; +DROP EXTENSION citus CASCADE; -- test database drop -CREATE EXTENSION cstore_fdw; +CREATE EXTENSION citus; CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; SELECT oid::text databaseoid FROM pg_database WHERE datname = current_database() \gset From b6d4a1bbe2e8760cce661ee3ff2b2e8ff9dfb3ca Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Thu, 5 Nov 2020 16:38:57 +0100 Subject: [PATCH 113/124] fix style --- src/backend/columnar/cstore_customscan.c | 26 +-- src/backend/columnar/cstore_fdw.c | 205 +++++++----------- src/backend/columnar/cstore_metadata_tables.c | 153 +++++-------- src/backend/columnar/cstore_reader.c | 143 +++++------- src/backend/columnar/cstore_tableam.c | 109 ++++------ src/backend/columnar/cstore_writer.c | 68 ++---- .../regress/expected/am_vacuum_vs_insert.out | 52 ++--- .../regress/expected/am_write_concurrency.out | 144 ++++++------ src/test/regress/expected/create.out | 2 +- src/test/regress/input/fdw_create.source | 10 +- src/test/regress/sql/am_truncate.sql | 2 +- src/test/regress/sql/fdw_truncate.sql | 2 +- 12 files changed, 362 insertions(+), 554 deletions(-) diff --git a/src/backend/columnar/cstore_customscan.c b/src/backend/columnar/cstore_customscan.c index 5e2d4c461..3cca3ad29 100644 --- a/src/backend/columnar/cstore_customscan.c +++ b/src/backend/columnar/cstore_customscan.c @@ -136,8 +136,6 @@ static void CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { - Relation relation; - /* call into previous hook if assigned */ if (PreviousSetRelPathlistHook) { @@ -161,7 +159,7 @@ CStoreSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, * If that is the case we want to insert an extra path that pushes down the projection * into the scan of the table to minimize the data read. */ - relation = RelationIdGetRelation(rte->relid); + Relation relation = RelationIdGetRelation(rte->relid); if (relation->rd_tableam == GetCstoreTableAmRoutine()) { Path *customPath = CreateCStoreScanPath(rel, rte); @@ -181,19 +179,17 @@ CreateCStoreScanPath(RelOptInfo *rel, RangeTblEntry *rte) { CStoreScanPath *cspath = (CStoreScanPath *) newNode(sizeof(CStoreScanPath), T_CustomPath); - CustomPath *cpath; - Path *path; /* * popuate custom path information */ - cpath = &cspath->custom_path; + CustomPath *cpath = &cspath->custom_path; cpath->methods = &CStoreScanPathMethods; /* * populate generic path information */ - path = &cpath->path; + Path *path = &cpath->path; path->pathtype = T_CustomScan; path->parent = rel; path->pathtarget = rel->reltarget; @@ -329,18 +325,14 @@ static TupleTableSlot * CStoreScanNext(CStoreScanState *cstorescanstate) { CustomScanState *node = (CustomScanState *) cstorescanstate; - TableScanDesc scandesc; - EState *estate; - ScanDirection direction; - TupleTableSlot *slot; /* * get information from the estate and scan state */ - scandesc = node->ss.ss_currentScanDesc; - estate = node->ss.ps.state; - direction = estate->es_direction; - slot = node->ss.ss_ScanTupleSlot; + TableScanDesc scandesc = node->ss.ss_currentScanDesc; + EState *estate = node->ss.ps.state; + ScanDirection direction = estate->es_direction; + TupleTableSlot *slot = node->ss.ss_ScanTupleSlot; if (scandesc == NULL) { @@ -394,12 +386,10 @@ CStoreScan_ExecCustomScan(CustomScanState *node) static void CStoreScan_EndCustomScan(CustomScanState *node) { - TableScanDesc scanDesc; - /* * get information from node */ - scanDesc = node->ss.ss_currentScanDesc; + TableScanDesc scanDesc = node->ss.ss_currentScanDesc; /* * Free the exprcontext diff --git a/src/backend/columnar/cstore_fdw.c b/src/backend/columnar/cstore_fdw.c index 79af4f3bd..e7b1a76b8 100644 --- a/src/backend/columnar/cstore_fdw.c +++ b/src/backend/columnar/cstore_fdw.c @@ -255,17 +255,14 @@ cstore_fdw_finish() Datum cstore_ddl_event_end_trigger(PG_FUNCTION_ARGS) { - EventTriggerData *triggerData = NULL; - Node *parseTree = NULL; - /* error if event trigger manager did not call this function */ if (!CALLED_AS_EVENT_TRIGGER(fcinfo)) { ereport(ERROR, (errmsg("trigger not fired by event trigger manager"))); } - triggerData = (EventTriggerData *) fcinfo->context; - parseTree = triggerData->parsetree; + EventTriggerData *triggerData = (EventTriggerData *) fcinfo->context; + Node *parseTree = triggerData->parsetree; if (nodeTag(parseTree) == T_CreateForeignTableStmt) { @@ -495,16 +492,9 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) { uint64 processedRowCount = 0; Relation relation = NULL; - Oid relationId = InvalidOid; - TupleDesc tupleDescriptor = NULL; - uint32 columnCount = 0; CopyState copyState = NULL; bool nextRowFound = true; - Datum *columnValues = NULL; - bool *columnNulls = NULL; TableWriteState *writeState = NULL; - CStoreOptions *cstoreOptions = NULL; - MemoryContext tupleContext = NULL; /* Only superuser can copy from or to local file */ CheckSuperuserPrivilegesForCopy(copyStatement); @@ -516,15 +506,15 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) * concurrent reads and writes. */ relation = cstore_fdw_openrv(copyStatement->relation, RowExclusiveLock); - relationId = RelationGetRelid(relation); + Oid relationId = RelationGetRelid(relation); /* allocate column values and nulls arrays */ - tupleDescriptor = RelationGetDescr(relation); - columnCount = tupleDescriptor->natts; - columnValues = palloc0(columnCount * sizeof(Datum)); - columnNulls = palloc0(columnCount * sizeof(bool)); + TupleDesc tupleDescriptor = RelationGetDescr(relation); + uint32 columnCount = tupleDescriptor->natts; + Datum *columnValues = palloc0(columnCount * sizeof(Datum)); + bool *columnNulls = palloc0(columnCount * sizeof(bool)); - cstoreOptions = CStoreGetOptions(relationId); + CStoreOptions *cstoreOptions = CStoreGetOptions(relationId); /* * We create a new memory context called tuple context, and read and write @@ -533,9 +523,9 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) * allocated for each row, and don't bloat memory usage with large input * files. */ - tupleContext = AllocSetContextCreate(CurrentMemoryContext, - "CStore COPY Row Memory Context", - ALLOCSET_DEFAULT_SIZES); + MemoryContext tupleContext = AllocSetContextCreate(CurrentMemoryContext, + "CStore COPY Row Memory Context", + ALLOCSET_DEFAULT_SIZES); /* init state to read from COPY data source */ #if (PG_VERSION_NUM >= 100000) @@ -606,10 +596,6 @@ static uint64 CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString) { uint64 processedCount = 0; - RangeVar *relation = NULL; - char *qualifiedName = NULL; - List *queryList = NIL; - Node *rawQuery = NULL; StringInfo newQuerySubstring = makeStringInfo(); @@ -621,14 +607,14 @@ CopyOutCStoreTable(CopyStmt *copyStatement, const char *queryString) "...' instead"))); } - relation = copyStatement->relation; - qualifiedName = quote_qualified_identifier(relation->schemaname, - relation->relname); + RangeVar *relation = copyStatement->relation; + char *qualifiedName = quote_qualified_identifier(relation->schemaname, + relation->relname); appendStringInfo(newQuerySubstring, "select * from %s", qualifiedName); - queryList = raw_parser(newQuerySubstring->data); + List *queryList = raw_parser(newQuerySubstring->data); /* take the first parse tree */ - rawQuery = linitial(queryList); + Node *rawQuery = linitial(queryList); /* * Set the relation field to NULL so that COPY command works on @@ -674,7 +660,6 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) { ObjectType objectType = alterStatement->relkind; RangeVar *relationRangeVar = alterStatement->relation; - Oid relationId = InvalidOid; List *commandList = alterStatement->cmds; ListCell *commandCell = NULL; @@ -684,7 +669,7 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) return; } - relationId = RangeVarGetRelid(relationRangeVar, AccessShareLock, true); + Oid relationId = RangeVarGetRelid(relationRangeVar, AccessShareLock, true); if (!IsCStoreFdwTable(relationId)) { return; @@ -700,7 +685,6 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) Oid targetTypeId = typenameTypeId(NULL, columnDef->typeName); char *typeName = TypeNameToString(columnDef->typeName); AttrNumber attributeNumber = get_attnum(relationId, columnName); - Oid currentTypeId = InvalidOid; if (attributeNumber <= 0) { @@ -708,7 +692,7 @@ CStoreProcessAlterTableCommand(AlterTableStmt *alterStatement) continue; } - currentTypeId = get_atttype(relationId, attributeNumber); + Oid currentTypeId = get_atttype(relationId, attributeNumber); /* * We are only interested in implicit coersion type compatibility. @@ -811,34 +795,28 @@ TruncateCStoreTables(List *cstoreRelationList) static void FdwNewRelFileNode(Relation relation) { - Relation pg_class; - HeapTuple tuple; - Form_pg_class classform; + Relation pg_class = heap_open(RelationRelationId, RowExclusiveLock); - pg_class = heap_open(RelationRelationId, RowExclusiveLock); - - tuple = SearchSysCacheCopy1(RELOID, - ObjectIdGetDatum(RelationGetRelid(relation))); + HeapTuple tuple = SearchSysCacheCopy1(RELOID, + ObjectIdGetDatum(RelationGetRelid(relation))); if (!HeapTupleIsValid(tuple)) { elog(ERROR, "could not find tuple for relation %u", RelationGetRelid(relation)); } - classform = (Form_pg_class) GETSTRUCT(tuple); + Form_pg_class classform = (Form_pg_class) GETSTRUCT(tuple); if (true) { char persistence = relation->rd_rel->relpersistence; - Relation tmprel; Oid tablespace; - Oid filenode; /* * Upgrade to AccessExclusiveLock, and hold until the end of the * transaction. This shouldn't happen during a read, but it's hard to * prove that because it happens lazily. */ - tmprel = heap_open(relation->rd_id, AccessExclusiveLock); + Relation tmprel = heap_open(relation->rd_id, AccessExclusiveLock); heap_close(tmprel, NoLock); if (OidIsValid(relation->rd_rel->relfilenode)) @@ -856,7 +834,7 @@ FdwNewRelFileNode(Relation relation) tablespace = MyDatabaseTableSpace; } - filenode = GetNewRelFileNode(tablespace, NULL, persistence); + Oid filenode = GetNewRelFileNode(tablespace, NULL, persistence); classform->relfilenode = filenode; classform->relpages = 0; /* it's empty until further notice */ @@ -886,9 +864,8 @@ FdwCreateStorage(Relation relation) if (!smgrexists(relation->rd_smgr, MAIN_FORKNUM)) { #if PG_VERSION_NUM >= 120000 - SMgrRelation srel; - srel = RelationCreateStorage(relation->rd_node, - relation->rd_rel->relpersistence); + SMgrRelation srel = RelationCreateStorage(relation->rd_node, + relation->rd_rel->relpersistence); smgrclose(srel); #else RelationCreateStorage(relation->rd_node, @@ -906,14 +883,13 @@ bool IsCStoreFdwTable(Oid relationId) { bool cstoreTable = false; - char relationKind = 0; if (relationId == InvalidOid) { return false; } - relationKind = get_rel_relkind(relationId); + char relationKind = get_rel_relkind(relationId); if (relationKind == RELKIND_FOREIGN_TABLE) { ForeignTable *foreignTable = GetForeignTable(relationId); @@ -956,13 +932,8 @@ IsCStoreServer(ForeignServer *server) static bool DistributedTable(Oid relationId) { - bool distributedTable = false; - Oid partitionOid = InvalidOid; - Relation heapRelation = NULL; - TableScanDesc scanDesc = NULL; const int scanKeyCount = 1; ScanKeyData scanKey[1]; - HeapTuple heapTuple = NULL; bool missingOK = true; Oid extensionOid = get_extension_oid(CITUS_EXTENSION_NAME, missingOK); @@ -972,23 +943,25 @@ DistributedTable(Oid relationId) return false; } - partitionOid = get_relname_relid(CITUS_PARTITION_TABLE_NAME, PG_CATALOG_NAMESPACE); + Oid partitionOid = get_relname_relid(CITUS_PARTITION_TABLE_NAME, + PG_CATALOG_NAMESPACE); if (partitionOid == InvalidOid) { /* the pg_dist_partition table does not exist */ return false; } - heapRelation = heap_open(partitionOid, AccessShareLock); + Relation heapRelation = heap_open(partitionOid, AccessShareLock); ScanKeyInit(&scanKey[0], ATTR_NUM_PARTITION_RELATION_ID, InvalidStrategy, F_OIDEQ, ObjectIdGetDatum(relationId)); - scanDesc = table_beginscan(heapRelation, SnapshotSelf, scanKeyCount, scanKey); + TableScanDesc scanDesc = table_beginscan(heapRelation, SnapshotSelf, scanKeyCount, + scanKey); - heapTuple = heap_getnext(scanDesc, ForwardScanDirection); + HeapTuple heapTuple = heap_getnext(scanDesc, ForwardScanDirection); - distributedTable = HeapTupleIsValid(heapTuple); + bool distributedTable = HeapTupleIsValid(heapTuple); table_endscan(scanDesc); relation_close(heapRelation, AccessShareLock); @@ -1027,17 +1000,15 @@ cstore_table_size(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); bool cstoreTable = IsCStoreFdwTable(relationId); - Relation relation; - BlockNumber nblocks; if (!cstoreTable) { ereport(ERROR, (errmsg("relation is not a cstore table"))); } - relation = cstore_fdw_open(relationId, AccessShareLock); + Relation relation = cstore_fdw_open(relationId, AccessShareLock); RelationOpenSmgr(relation); - nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + BlockNumber nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); heap_close(relation, AccessShareLock); PG_RETURN_INT64(nblocks * BLCKSZ); } @@ -1205,20 +1176,16 @@ GetSlotHeapTuple(TupleTableSlot *tts) static CStoreOptions * CStoreGetOptions(Oid foreignTableId) { - CStoreOptions *cstoreOptions = NULL; CompressionType compressionType = cstore_compression; int32 stripeRowCount = cstore_stripe_row_count; int32 blockRowCount = cstore_block_row_count; - char *compressionTypeString = NULL; - char *stripeRowCountString = NULL; - char *blockRowCountString = NULL; - compressionTypeString = CStoreGetOptionValue(foreignTableId, - OPTION_NAME_COMPRESSION_TYPE); - stripeRowCountString = CStoreGetOptionValue(foreignTableId, - OPTION_NAME_STRIPE_ROW_COUNT); - blockRowCountString = CStoreGetOptionValue(foreignTableId, - OPTION_NAME_BLOCK_ROW_COUNT); + char *compressionTypeString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_COMPRESSION_TYPE); + char *stripeRowCountString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_STRIPE_ROW_COUNT); + char *blockRowCountString = CStoreGetOptionValue(foreignTableId, + OPTION_NAME_BLOCK_ROW_COUNT); ValidateForeignTableOptions(compressionTypeString, stripeRowCountString, blockRowCountString); @@ -1237,7 +1204,7 @@ CStoreGetOptions(Oid foreignTableId) blockRowCount = pg_atoi(blockRowCountString, sizeof(int32), 0); } - cstoreOptions = palloc0(sizeof(CStoreOptions)); + CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); cstoreOptions->compressionType = compressionType; cstoreOptions->stripeRowCount = stripeRowCount; cstoreOptions->blockRowCount = blockRowCount; @@ -1254,14 +1221,12 @@ CStoreGetOptions(Oid foreignTableId) static char * CStoreGetOptionValue(Oid foreignTableId, const char *optionName) { - ForeignTable *foreignTable = NULL; - ForeignServer *foreignServer = NULL; List *optionList = NIL; ListCell *optionCell = NULL; char *optionValue = NULL; - foreignTable = GetForeignTable(foreignTableId); - foreignServer = GetForeignServer(foreignTable->serverid); + ForeignTable *foreignTable = GetForeignTable(foreignTableId); + ForeignServer *foreignServer = GetForeignServer(foreignTable->serverid); optionList = list_concat(optionList, foreignTable->options); optionList = list_concat(optionList, foreignServer->options); @@ -1451,8 +1416,6 @@ CStoreGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreignTableI #endif { ForeignScan *foreignScan = NULL; - List *columnList = NIL; - List *foreignPrivateList = NIL; /* * Although we skip row blocks that are refuted by the WHERE clause, but @@ -1469,8 +1432,8 @@ CStoreGetForeignPlan(PlannerInfo * root, RelOptInfo * baserel, Oid foreignTableI * in executor's callback functions, so we get the column list here and put * it into foreign scan node's private list. */ - columnList = ColumnList(baserel, foreignTableId); - foreignPrivateList = list_make1(columnList); + List *columnList = ColumnList(baserel, foreignTableId); + List *foreignPrivateList = list_make1(columnList); /* create the foreign scan node */ #if PG_VERSION_NUM >= 90500 @@ -1525,10 +1488,8 @@ TupleCountEstimate(Relation relation, RelOptInfo *baserel) static BlockNumber PageCount(Relation relation) { - BlockNumber nblocks; - RelationOpenSmgr(relation); - nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + BlockNumber nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); return (nblocks > 0) ? nblocks : 1; } @@ -1655,9 +1616,8 @@ CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState /* supress file size if we're not showing cost details */ if (explainState->costs) { - long nblocks; RelationOpenSmgr(relation); - nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); + long nblocks = smgrnblocks(relation->rd_smgr, MAIN_FORKNUM); ExplainPropertyLong("CStore File Size", (long) (nblocks * BLCKSZ), explainState); } @@ -1668,15 +1628,8 @@ CStoreExplainForeignScan(ForeignScanState *scanState, ExplainState *explainState static void CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) { - TableReadState *readState = NULL; - Oid foreignTableId = InvalidOid; Relation currentRelation = scanState->ss.ss_currentRelation; TupleDesc tupleDescriptor = RelationGetDescr(currentRelation); - List *columnList = NIL; - ForeignScan *foreignScan = NULL; - List *foreignPrivateList = NIL; - List *whereClauseList = NIL; - Relation relation = NULL; cstore_fdw_initrel(currentRelation); @@ -1686,15 +1639,16 @@ CStoreBeginForeignScan(ForeignScanState *scanState, int executorFlags) return; } - foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); + Oid foreignTableId = RelationGetRelid(scanState->ss.ss_currentRelation); - foreignScan = (ForeignScan *) scanState->ss.ps.plan; - foreignPrivateList = (List *) foreignScan->fdw_private; - whereClauseList = foreignScan->scan.plan.qual; + ForeignScan *foreignScan = (ForeignScan *) scanState->ss.ps.plan; + List *foreignPrivateList = (List *) foreignScan->fdw_private; + List *whereClauseList = foreignScan->scan.plan.qual; - columnList = (List *) linitial(foreignPrivateList); - relation = cstore_fdw_open(foreignTableId, AccessShareLock); - readState = CStoreBeginRead(relation, tupleDescriptor, columnList, whereClauseList); + List *columnList = (List *) linitial(foreignPrivateList); + Relation relation = cstore_fdw_open(foreignTableId, AccessShareLock); + TableReadState *readState = CStoreBeginRead(relation, tupleDescriptor, columnList, + whereClauseList); scanState->fdw_state = (void *) readState; } @@ -1710,7 +1664,6 @@ CStoreIterateForeignScan(ForeignScanState *scanState) { TableReadState *readState = (TableReadState *) scanState->fdw_state; TupleTableSlot *tupleSlot = scanState->ss.ss_ScanTupleSlot; - bool nextRowFound = false; TupleDesc tupleDescriptor = tupleSlot->tts_tupleDescriptor; Datum *columnValues = tupleSlot->tts_values; @@ -1723,7 +1676,7 @@ CStoreIterateForeignScan(ForeignScanState *scanState) ExecClearTuple(tupleSlot); - nextRowFound = CStoreReadNextRow(readState, columnValues, columnNulls); + bool nextRowFound = CStoreReadNextRow(readState, columnValues, columnNulls); if (nextRowFound) { ExecStoreVirtualTuple(tupleSlot); @@ -1797,13 +1750,9 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, double selectionState = 0; MemoryContext oldContext = CurrentMemoryContext; MemoryContext tupleContext = NULL; - Datum *columnValues = NULL; - bool *columnNulls = NULL; TupleTableSlot *scanTupleSlot = NULL; List *columnList = NIL; - List *foreignPrivateList = NULL; ForeignScanState *scanState = NULL; - ForeignScan *foreignScan = NULL; char *relationName = NULL; int executorFlags = 0; uint32 columnIndex = 0; @@ -1829,13 +1778,13 @@ CStoreAcquireSampleRows(Relation relation, int logLevel, } /* setup foreign scan plan node */ - foreignPrivateList = list_make1(columnList); - foreignScan = makeNode(ForeignScan); + List *foreignPrivateList = list_make1(columnList); + ForeignScan *foreignScan = makeNode(ForeignScan); foreignScan->fdw_private = foreignPrivateList; /* set up tuple slot */ - columnValues = palloc0(columnCount * sizeof(Datum)); - columnNulls = palloc0(columnCount * sizeof(bool)); + Datum *columnValues = palloc0(columnCount * sizeof(Datum)); + bool *columnNulls = palloc0(columnCount * sizeof(bool)); #if PG_VERSION_NUM >= 120000 scanTupleSlot = MakeTupleTableSlot(NULL, &TTSOpsVirtual); #elif PG_VERSION_NUM >= 110000 @@ -1968,13 +1917,12 @@ CStorePlanForeignModify(PlannerInfo *plannerInfo, ModifyTable *plan, if (plan->operation == CMD_INSERT) { ListCell *tableCell = NULL; - Query *query = NULL; /* * Only insert operation with select subquery is supported. Other forms * of insert, update, and delete operations are not supported. */ - query = plannerInfo->parse; + Query *query = plannerInfo->parse; foreach(tableCell, query->rtable) { RangeTblEntry *tableEntry = lfirst(tableCell); @@ -2027,22 +1975,16 @@ CStoreBeginForeignModify(ModifyTableState *modifyTableState, static void CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *relationInfo) { - Oid foreignTableOid = InvalidOid; - CStoreOptions *cstoreOptions = NULL; - TupleDesc tupleDescriptor = NULL; - TableWriteState *writeState = NULL; - Relation relation = NULL; + Oid foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); + Relation relation = cstore_fdw_open(foreignTableOid, RowExclusiveLock); + CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableOid); + TupleDesc tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - foreignTableOid = RelationGetRelid(relationInfo->ri_RelationDesc); - relation = cstore_fdw_open(foreignTableOid, RowExclusiveLock); - cstoreOptions = CStoreGetOptions(foreignTableOid); - tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - - writeState = CStoreBeginWrite(relation, - cstoreOptions->compressionType, - cstoreOptions->stripeRowCount, - cstoreOptions->blockRowCount, - tupleDescriptor); + TableWriteState *writeState = CStoreBeginWrite(relation, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + tupleDescriptor); relationInfo->ri_FdwState = (void *) writeState; } @@ -2057,11 +1999,10 @@ CStoreExecForeignInsert(EState *executorState, ResultRelInfo *relationInfo, TupleTableSlot *tupleSlot, TupleTableSlot *planSlot) { TableWriteState *writeState = (TableWriteState *) relationInfo->ri_FdwState; - HeapTuple heapTuple; Assert(writeState != NULL); - heapTuple = GetSlotHeapTuple(tupleSlot); + HeapTuple heapTuple = GetSlotHeapTuple(tupleSlot); if (HeapTupleHasExternal(heapTuple)) { diff --git a/src/backend/columnar/cstore_metadata_tables.c b/src/backend/columnar/cstore_metadata_tables.c index 4e40aee68..f23aaba2d 100644 --- a/src/backend/columnar/cstore_metadata_tables.c +++ b/src/backend/columnar/cstore_metadata_tables.c @@ -130,9 +130,6 @@ void InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCount, CompressionType compression) { - Oid cstoreDataFilesOid = InvalidOid; - Relation cstoreDataFiles = NULL; - ModifyState *modifyState = NULL; NameData compressionName = { 0 }; namestrcpy(&compressionName, CompressionTypeStr(compression)); @@ -149,10 +146,10 @@ InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCoun DeleteDataFileMetadataRowIfExists(relfilenode); - cstoreDataFilesOid = CStoreDataFilesRelationId(); - cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock); + Oid cstoreDataFilesOid = CStoreDataFilesRelationId(); + Relation cstoreDataFiles = heap_open(cstoreDataFilesOid, RowExclusiveLock); - modifyState = StartModifyRelation(cstoreDataFiles); + ModifyState *modifyState = StartModifyRelation(cstoreDataFiles); InsertTupleAndEnforceConstraints(modifyState, values, nulls); FinishModifyRelation(modifyState); @@ -169,9 +166,6 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo const int scanKeyCount = 1; ScanKeyData scanKey[1]; bool indexOK = true; - SysScanDesc scanDescriptor = NULL; - Form_cstore_data_files metadata = NULL; - HeapTuple heapTuple = NULL; Datum values[Natts_cstore_data_files] = { 0 }; bool isnull[Natts_cstore_data_files] = { 0 }; bool replace[Natts_cstore_data_files] = { 0 }; @@ -182,19 +176,19 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_INT8EQ, ObjectIdGetDatum(relfilenode)); - scanDescriptor = systable_beginscan(cstoreDataFiles, - CStoreDataFilesIndexRelationId(), - indexOK, - NULL, scanKeyCount, scanKey); + SysScanDesc scanDescriptor = systable_beginscan(cstoreDataFiles, + CStoreDataFilesIndexRelationId(), + indexOK, + NULL, scanKeyCount, scanKey); - heapTuple = systable_getnext(scanDescriptor); + HeapTuple heapTuple = systable_getnext(scanDescriptor); if (heapTuple == NULL) { ereport(ERROR, (errmsg("relfilenode %d doesn't belong to a cstore table", relfilenode))); } - metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple); + Form_cstore_data_files metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple); bool changed = false; if (metadata->block_row_count != blockRowCount) @@ -250,14 +244,11 @@ SaveStripeSkipList(Oid relfilenode, uint64 stripe, StripeSkipList *stripeSkipLis { uint32 columnIndex = 0; uint32 blockIndex = 0; - Oid cstoreSkipNodesOid = InvalidOid; - Relation cstoreSkipNodes = NULL; - ModifyState *modifyState = NULL; uint32 columnCount = stripeSkipList->columnCount; - cstoreSkipNodesOid = CStoreSkipNodesRelationId(); - cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock); - modifyState = StartModifyRelation(cstoreSkipNodes); + Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId(); + Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, RowExclusiveLock); + ModifyState *modifyState = StartModifyRelation(cstoreSkipNodes); for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { @@ -316,28 +307,24 @@ StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount) { - StripeSkipList *skipList = NULL; int32 columnIndex = 0; - Oid cstoreSkipNodesOid = InvalidOid; - Relation cstoreSkipNodes = NULL; - Relation index = NULL; HeapTuple heapTuple = NULL; uint32 columnCount = tupleDescriptor->natts; ScanKeyData scanKey[2]; - SysScanDesc scanDescriptor = NULL; - cstoreSkipNodesOid = CStoreSkipNodesRelationId(); - cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); - index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); + Oid cstoreSkipNodesOid = CStoreSkipNodesRelationId(); + Relation cstoreSkipNodes = heap_open(cstoreSkipNodesOid, AccessShareLock); + Relation index = index_open(CStoreSkipNodesIndexRelationId(), AccessShareLock); ScanKeyInit(&scanKey[0], Anum_cstore_skipnodes_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); ScanKeyInit(&scanKey[1], Anum_cstore_skipnodes_stripe, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(stripe)); - scanDescriptor = systable_beginscan_ordered(cstoreSkipNodes, index, NULL, 2, scanKey); + SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreSkipNodes, index, NULL, + 2, scanKey); - skipList = palloc0(sizeof(StripeSkipList)); + StripeSkipList *skipList = palloc0(sizeof(StripeSkipList)); skipList->blockCount = blockCount; skipList->columnCount = columnCount; skipList->blockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); @@ -349,18 +336,14 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { - int32 attr = 0; - int32 blockIndex = 0; - ColumnBlockSkipNode *skipNode = NULL; - Datum datumArray[Natts_cstore_skipnodes]; bool isNullArray[Natts_cstore_skipnodes]; heap_deform_tuple(heapTuple, RelationGetDescr(cstoreSkipNodes), datumArray, isNullArray); - attr = DatumGetInt32(datumArray[Anum_cstore_skipnodes_attr - 1]); - blockIndex = DatumGetInt32(datumArray[Anum_cstore_skipnodes_block - 1]); + int32 attr = DatumGetInt32(datumArray[Anum_cstore_skipnodes_attr - 1]); + int32 blockIndex = DatumGetInt32(datumArray[Anum_cstore_skipnodes_block - 1]); if (attr <= 0 || attr > columnCount) { @@ -376,7 +359,8 @@ ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, columnIndex = attr - 1; - skipNode = &skipList->blockSkipNodeArray[columnIndex][blockIndex]; + ColumnBlockSkipNode *skipNode = + &skipList->blockSkipNodeArray[columnIndex][blockIndex]; skipNode->rowCount = DatumGetInt64(datumArray[Anum_cstore_skipnodes_row_count - 1]); skipNode->valueBlockOffset = @@ -507,12 +491,11 @@ GetHighestUsedAddressAndId(Oid relfilenode, uint64 *highestUsedId) { ListCell *stripeMetadataCell = NULL; - List *stripeMetadataList = NIL; SnapshotData SnapshotDirty; InitDirtySnapshot(SnapshotDirty); - stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); + List *stripeMetadataList = ReadDataFileStripeList(relfilenode, &SnapshotDirty); *highestUsedId = 0; *highestUsedAddress = 0; @@ -538,14 +521,7 @@ ReserveStripe(Relation rel, uint64 sizeBytes, uint64 blockCount, uint64 blockRowCount) { StripeMetadata stripe = { 0 }; - Oid relfilenode = InvalidOid; uint64 currLogicalHigh = 0; - SmgrAddr currSmgrHigh; - uint64 nblocks = 0; - uint64 resLogicalStart = 0; - SmgrAddr resSmgrStart; - uint64 resLogicalEnd = 0; - SmgrAddr resSmgrEnd; uint64 highestId = 0; /* @@ -556,18 +532,18 @@ ReserveStripe(Relation rel, uint64 sizeBytes, */ LockRelation(rel, ShareUpdateExclusiveLock); - relfilenode = rel->rd_node.relNode; + Oid relfilenode = rel->rd_node.relNode; GetHighestUsedAddressAndId(relfilenode, &currLogicalHigh, &highestId); - currSmgrHigh = logical_to_smgr(currLogicalHigh); + SmgrAddr currSmgrHigh = logical_to_smgr(currLogicalHigh); - resSmgrStart = next_block_start(currSmgrHigh); - resLogicalStart = smgr_to_logical(resSmgrStart); + SmgrAddr resSmgrStart = next_block_start(currSmgrHigh); + uint64 resLogicalStart = smgr_to_logical(resSmgrStart); - resLogicalEnd = resLogicalStart + sizeBytes - 1; - resSmgrEnd = logical_to_smgr(resLogicalEnd); + uint64 resLogicalEnd = resLogicalStart + sizeBytes - 1; + SmgrAddr resSmgrEnd = logical_to_smgr(resLogicalEnd); RelationOpenSmgr(rel); - nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + uint64 nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); while (resSmgrEnd.blockno >= nblocks) { @@ -602,34 +578,29 @@ static List * ReadDataFileStripeList(Oid relfilenode, Snapshot snapshot) { List *stripeMetadataList = NIL; - Oid cstoreStripesOid = InvalidOid; - Relation cstoreStripes = NULL; - Relation index = NULL; - TupleDesc tupleDescriptor = NULL; ScanKeyData scanKey[1]; - SysScanDesc scanDescriptor = NULL; HeapTuple heapTuple; ScanKeyInit(&scanKey[0], Anum_cstore_stripes_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); - cstoreStripesOid = CStoreStripesRelationId(); - cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); - index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); - tupleDescriptor = RelationGetDescr(cstoreStripes); + Oid cstoreStripesOid = CStoreStripesRelationId(); + Relation cstoreStripes = heap_open(cstoreStripesOid, AccessShareLock); + Relation index = index_open(CStoreStripesIndexRelationId(), AccessShareLock); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreStripes); - scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, snapshot, 1, - scanKey); + SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreStripes, index, + snapshot, 1, + scanKey); while (HeapTupleIsValid(heapTuple = systable_getnext(scanDescriptor))) { - StripeMetadata *stripeMetadata = NULL; Datum datumArray[Natts_cstore_stripes]; bool isNullArray[Natts_cstore_stripes]; heap_deform_tuple(heapTuple, tupleDescriptor, datumArray, isNullArray); - stripeMetadata = palloc0(sizeof(StripeMetadata)); + StripeMetadata *stripeMetadata = palloc0(sizeof(StripeMetadata)); stripeMetadata->id = DatumGetInt64(datumArray[Anum_cstore_stripes_stripe - 1]); stripeMetadata->fileOffset = DatumGetInt64( datumArray[Anum_cstore_stripes_file_offset - 1]); @@ -663,19 +634,13 @@ static bool ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) { bool found = false; - Oid cstoreDataFilesOid = InvalidOid; - Relation cstoreDataFiles = NULL; - Relation index = NULL; - TupleDesc tupleDescriptor = NULL; ScanKeyData scanKey[1]; - SysScanDesc scanDescriptor = NULL; - HeapTuple heapTuple = NULL; ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); - cstoreDataFilesOid = CStoreDataFilesRelationId(); - cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); + Oid cstoreDataFilesOid = CStoreDataFilesRelationId(); + Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); if (cstoreDataFiles == NULL) { /* @@ -685,7 +650,7 @@ ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) return false; } - index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock); + Relation index = try_relation_open(CStoreDataFilesIndexRelationId(), AccessShareLock); if (index == NULL) { heap_close(cstoreDataFiles, NoLock); @@ -694,11 +659,12 @@ ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) return false; } - tupleDescriptor = RelationGetDescr(cstoreDataFiles); + TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles); - scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, 1, scanKey); + SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, + 1, scanKey); - heapTuple = systable_getnext(scanDescriptor); + HeapTuple heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { Datum datumArray[Natts_cstore_data_files]; @@ -707,13 +673,11 @@ ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) if (metadata) { - Name compressionName = NULL; - metadata->blockRowCount = DatumGetInt32( datumArray[Anum_cstore_data_files_block_row_count - 1]); metadata->stripeRowCount = DatumGetInt32( datumArray[Anum_cstore_data_files_stripe_row_count - 1]); - compressionName = DatumGetName( + Name compressionName = DatumGetName( datumArray[Anum_cstore_data_files_compression - 1]); metadata->compression = ParseCompressionType(NameStr(*compressionName)); } @@ -734,12 +698,7 @@ ReadCStoreDataFiles(Oid relfilenode, DataFileMetadata *metadata) void DeleteDataFileMetadataRowIfExists(Oid relfilenode) { - Oid cstoreDataFilesOid = InvalidOid; - Relation cstoreDataFiles = NULL; - Relation index = NULL; ScanKeyData scanKey[1]; - SysScanDesc scanDescriptor = NULL; - HeapTuple heapTuple = NULL; /* * During a restore for binary upgrade, metadata tables and indexes may or @@ -753,19 +712,20 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode) ScanKeyInit(&scanKey[0], Anum_cstore_data_files_relfilenode, BTEqualStrategyNumber, F_OIDEQ, Int32GetDatum(relfilenode)); - cstoreDataFilesOid = CStoreDataFilesRelationId(); - cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); + Oid cstoreDataFilesOid = CStoreDataFilesRelationId(); + Relation cstoreDataFiles = try_relation_open(cstoreDataFilesOid, AccessShareLock); if (cstoreDataFiles == NULL) { /* extension has been dropped */ return; } - index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock); + Relation index = index_open(CStoreDataFilesIndexRelationId(), AccessShareLock); - scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, 1, scanKey); + SysScanDesc scanDescriptor = systable_beginscan_ordered(cstoreDataFiles, index, NULL, + 1, scanKey); - heapTuple = systable_getnext(scanDescriptor); + HeapTuple heapTuple = systable_getnext(scanDescriptor); if (HeapTupleIsValid(heapTuple)) { ModifyState *modifyState = StartModifyRelation(cstoreDataFiles); @@ -785,13 +745,12 @@ DeleteDataFileMetadataRowIfExists(Oid relfilenode) static ModifyState * StartModifyRelation(Relation rel) { - ModifyState *modifyState = NULL; EState *estate = create_estate_for_relation(rel); /* ExecSimpleRelationInsert, ... require caller to open indexes */ ExecOpenIndices(estate->es_result_relation_info, false); - modifyState = palloc(sizeof(ModifyState)); + ModifyState *modifyState = palloc(sizeof(ModifyState)); modifyState->rel = rel; modifyState->estate = estate; @@ -869,13 +828,11 @@ FinishModifyRelation(ModifyState *state) static EState * create_estate_for_relation(Relation rel) { - EState *estate; ResultRelInfo *resultRelInfo; - RangeTblEntry *rte; - estate = CreateExecutorState(); + EState *estate = CreateExecutorState(); - rte = makeNode(RangeTblEntry); + RangeTblEntry *rte = makeNode(RangeTblEntry); rte->rtekind = RTE_RELATION; rte->relid = RelationGetRelid(rel); rte->relkind = rel->rd_rel->relkind; diff --git a/src/backend/columnar/cstore_reader.c b/src/backend/columnar/cstore_reader.c index b46b59729..8818baf78 100644 --- a/src/backend/columnar/cstore_reader.c +++ b/src/backend/columnar/cstore_reader.c @@ -84,23 +84,20 @@ TableReadState * CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { - TableReadState *readState = NULL; - DataFileMetadata *datafileMetadata = NULL; - MemoryContext stripeReadContext = NULL; Oid relNode = relation->rd_node.relNode; - datafileMetadata = ReadDataFileMetadata(relNode, false); + DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relNode, false); /* * We allocate all stripe specific data in the stripeReadContext, and reset * this memory context before loading a new stripe. This is to avoid memory * leaks. */ - stripeReadContext = AllocSetContextCreate(CurrentMemoryContext, - "Stripe Read Memory Context", - ALLOCSET_DEFAULT_SIZES); + MemoryContext stripeReadContext = AllocSetContextCreate(CurrentMemoryContext, + "Stripe Read Memory Context", + ALLOCSET_DEFAULT_SIZES); - readState = palloc0(sizeof(TableReadState)); + TableReadState *readState = palloc0(sizeof(TableReadState)); readState->relation = relation; readState->datafileMetadata = datafileMetadata; readState->projectedColumnList = projectedColumnList; @@ -125,8 +122,6 @@ CStoreBeginRead(Relation relation, TupleDesc tupleDescriptor, bool CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNulls) { - uint32 blockIndex = 0; - uint32 blockRowIndex = 0; StripeMetadata *stripeMetadata = readState->currentStripeMetadata; MemoryContext oldContext = NULL; @@ -138,7 +133,6 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu */ while (readState->stripeBuffers == NULL) { - StripeBuffers *stripeBuffers = NULL; List *stripeMetadataList = readState->datafileMetadata->stripeMetadataList; uint32 stripeCount = list_length(stripeMetadataList); @@ -153,11 +147,14 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu readState->blockData = NULL; stripeMetadata = list_nth(stripeMetadataList, readState->readStripeCount); - stripeBuffers = LoadFilteredStripeBuffers(readState->relation, - stripeMetadata, - readState->tupleDescriptor, - readState->projectedColumnList, - readState->whereClauseList); + StripeBuffers *stripeBuffers = LoadFilteredStripeBuffers(readState->relation, + stripeMetadata, + readState-> + tupleDescriptor, + readState-> + projectedColumnList, + readState-> + whereClauseList); readState->readStripeCount++; readState->currentStripeMetadata = stripeMetadata; @@ -172,17 +169,15 @@ CStoreReadNextRow(TableReadState *readState, Datum *columnValues, bool *columnNu } } - blockIndex = readState->stripeReadRowCount / stripeMetadata->blockRowCount; - blockRowIndex = readState->stripeReadRowCount % stripeMetadata->blockRowCount; + uint32 blockIndex = readState->stripeReadRowCount / stripeMetadata->blockRowCount; + uint32 blockRowIndex = readState->stripeReadRowCount % stripeMetadata->blockRowCount; if (blockIndex != readState->deserializedBlockIndex) { - uint32 lastBlockIndex = 0; uint32 blockRowCount = 0; - uint32 stripeRowCount = 0; - stripeRowCount = stripeMetadata->rowCount; - lastBlockIndex = stripeRowCount / stripeMetadata->blockRowCount; + uint32 stripeRowCount = stripeMetadata->rowCount; + uint32 lastBlockIndex = stripeRowCount / stripeMetadata->blockRowCount; if (blockIndex == lastBlockIndex) { blockRowCount = stripeRowCount % stripeMetadata->blockRowCount; @@ -317,11 +312,11 @@ FreeBlockData(BlockData *blockData) uint64 CStoreTableRowCount(Relation relation) { - DataFileMetadata *datafileMetadata = NULL; ListCell *stripeMetadataCell = NULL; uint64 totalRowCount = 0; - datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode, false); + DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relation->rd_node.relNode, + false); foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) { @@ -343,8 +338,6 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, TupleDesc tupleDescriptor, List *projectedColumnList, List *whereClauseList) { - StripeBuffers *stripeBuffers = NULL; - ColumnBuffers **columnBuffersArray = NULL; uint32 columnIndex = 0; uint32 columnCount = tupleDescriptor->natts; @@ -363,7 +356,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, selectedBlockMask); /* load column data for projected columns */ - columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); + ColumnBuffers **columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); for (columnIndex = 0; columnIndex < stripeMetadata->columnCount; columnIndex++) { @@ -383,7 +376,7 @@ LoadFilteredStripeBuffers(Relation relation, StripeMetadata *stripeMetadata, } } - stripeBuffers = palloc0(sizeof(StripeBuffers)); + StripeBuffers *stripeBuffers = palloc0(sizeof(StripeBuffers)); stripeBuffers->columnCount = columnCount; stripeBuffers->rowCount = StripeSkipListRowCount(selectedBlockSkipList); stripeBuffers->columnBuffersArray = columnBuffersArray; @@ -432,7 +425,6 @@ LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, uint32 blockCount, uint64 stripeOffset, Form_pg_attribute attributeForm) { - ColumnBuffers *columnBuffers = NULL; uint32 blockIndex = 0; ColumnBlockBuffers **blockBuffersArray = palloc0(blockCount * sizeof(ColumnBlockBuffers *)); @@ -470,7 +462,7 @@ LoadColumnBuffers(Relation relation, ColumnBlockSkipNode *blockSkipNodeArray, blockBuffersArray[blockIndex]->valueCompressionType = compressionType; } - columnBuffers = palloc0(sizeof(ColumnBuffers)); + ColumnBuffers *columnBuffers = palloc0(sizeof(ColumnBuffers)); columnBuffers->blockBuffersArray = blockBuffersArray; return columnBuffers; @@ -486,34 +478,31 @@ static bool * SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, List *whereClauseList) { - bool *selectedBlockMask = NULL; ListCell *columnCell = NULL; uint32 blockIndex = 0; List *restrictInfoList = BuildRestrictInfoList(whereClauseList); - selectedBlockMask = palloc0(stripeSkipList->blockCount * sizeof(bool)); + bool *selectedBlockMask = palloc0(stripeSkipList->blockCount * sizeof(bool)); memset(selectedBlockMask, true, stripeSkipList->blockCount * sizeof(bool)); foreach(columnCell, projectedColumnList) { Var *column = lfirst(columnCell); uint32 columnIndex = column->varattno - 1; - FmgrInfo *comparisonFunction = NULL; - Node *baseConstraint = NULL; /* if this column's data type doesn't have a comparator, skip it */ - comparisonFunction = GetFunctionInfoOrNull(column->vartype, BTREE_AM_OID, - BTORDER_PROC); + FmgrInfo *comparisonFunction = GetFunctionInfoOrNull(column->vartype, + BTREE_AM_OID, + BTORDER_PROC); if (comparisonFunction == NULL) { continue; } - baseConstraint = BuildBaseConstraint(column); + Node *baseConstraint = BuildBaseConstraint(column); for (blockIndex = 0; blockIndex < stripeSkipList->blockCount; blockIndex++) { bool predicateRefuted = false; - List *constraintList = NIL; ColumnBlockSkipNode *blockSkipNodeArray = stripeSkipList->blockSkipNodeArray[columnIndex]; ColumnBlockSkipNode *blockSkipNode = &blockSkipNodeArray[blockIndex]; @@ -530,7 +519,7 @@ SelectedBlockMask(StripeSkipList *stripeSkipList, List *projectedColumnList, UpdateConstraint(baseConstraint, blockSkipNode->minimumValue, blockSkipNode->maximumValue); - constraintList = list_make1(baseConstraint); + List *constraintList = list_make1(baseConstraint); #if (PG_VERSION_NUM >= 100000) predicateRefuted = predicate_refuted_by(constraintList, restrictInfoList, false); @@ -558,24 +547,21 @@ FmgrInfo * GetFunctionInfoOrNull(Oid typeId, Oid accessMethodId, int16 procedureId) { FmgrInfo *functionInfo = NULL; - Oid operatorClassId = InvalidOid; - Oid operatorFamilyId = InvalidOid; - Oid operatorId = InvalidOid; /* get default operator class from pg_opclass for datum type */ - operatorClassId = GetDefaultOpClass(typeId, accessMethodId); + Oid operatorClassId = GetDefaultOpClass(typeId, accessMethodId); if (operatorClassId == InvalidOid) { return NULL; } - operatorFamilyId = get_opclass_family(operatorClassId); + Oid operatorFamilyId = get_opclass_family(operatorClassId); if (operatorFamilyId == InvalidOid) { return NULL; } - operatorId = get_opfamily_proc(operatorFamilyId, typeId, typeId, procedureId); + Oid operatorId = get_opfamily_proc(operatorFamilyId, typeId, typeId, procedureId); if (operatorId != InvalidOid) { functionInfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo)); @@ -601,10 +587,9 @@ BuildRestrictInfoList(List *whereClauseList) ListCell *qualCell = NULL; foreach(qualCell, whereClauseList) { - RestrictInfo *restrictInfo = NULL; Node *qualNode = (Node *) lfirst(qualCell); - restrictInfo = make_simple_restrictinfo((Expr *) qualNode); + RestrictInfo *restrictInfo = make_simple_restrictinfo((Expr *) qualNode); restrictInfoList = lappend(restrictInfoList, restrictInfo); } @@ -622,14 +607,10 @@ BuildRestrictInfoList(List *whereClauseList) static Node * BuildBaseConstraint(Var *variable) { - Node *baseConstraint = NULL; - OpExpr *lessThanExpr = NULL; - OpExpr *greaterThanExpr = NULL; + OpExpr *lessThanExpr = MakeOpExpression(variable, BTLessEqualStrategyNumber); + OpExpr *greaterThanExpr = MakeOpExpression(variable, BTGreaterEqualStrategyNumber); - lessThanExpr = MakeOpExpression(variable, BTLessEqualStrategyNumber); - greaterThanExpr = MakeOpExpression(variable, BTGreaterEqualStrategyNumber); - - baseConstraint = make_and_qual((Node *) lessThanExpr, (Node *) greaterThanExpr); + Node *baseConstraint = make_and_qual((Node *) lessThanExpr, (Node *) greaterThanExpr); return baseConstraint; } @@ -648,22 +629,19 @@ MakeOpExpression(Var *variable, int16 strategyNumber) Oid collationId = variable->varcollid; Oid accessMethodId = BTREE_AM_OID; - Oid operatorId = InvalidOid; - Const *constantValue = NULL; - OpExpr *expression = NULL; /* Load the operator from system catalogs */ - operatorId = GetOperatorByType(typeId, accessMethodId, strategyNumber); + Oid operatorId = GetOperatorByType(typeId, accessMethodId, strategyNumber); - constantValue = makeNullConst(typeId, typeModId, collationId); + Const *constantValue = makeNullConst(typeId, typeModId, collationId); /* Now make the expression with the given variable and a null constant */ - expression = (OpExpr *) make_opclause(operatorId, - InvalidOid, /* no result type yet */ - false, /* no return set */ - (Expr *) variable, - (Expr *) constantValue, - InvalidOid, collationId); + OpExpr *expression = (OpExpr *) make_opclause(operatorId, + InvalidOid, /* no result type yet */ + false, /* no return set */ + (Expr *) variable, + (Expr *) constantValue, + InvalidOid, collationId); /* Set implementing function id and result type */ expression->opfuncid = get_opcode(operatorId); @@ -707,14 +685,12 @@ UpdateConstraint(Node *baseConstraint, Datum minValue, Datum maxValue) Node *minNode = get_rightop((Expr *) greaterThanExpr); Node *maxNode = get_rightop((Expr *) lessThanExpr); - Const *minConstant = NULL; - Const *maxConstant = NULL; Assert(IsA(minNode, Const)); Assert(IsA(maxNode, Const)); - minConstant = (Const *) minNode; - maxConstant = (Const *) maxNode; + Const *minConstant = (Const *) minNode; + Const *maxConstant = (Const *) maxNode; minConstant->constvalue = minValue; maxConstant->constvalue = maxValue; @@ -735,8 +711,6 @@ static StripeSkipList * SelectedBlockSkipList(StripeSkipList *stripeSkipList, bool *projectedColumnMask, bool *selectedBlockMask) { - StripeSkipList *SelectedBlockSkipList = NULL; - ColumnBlockSkipNode **selectedBlockSkipNodeArray = NULL; uint32 selectedBlockCount = 0; uint32 blockIndex = 0; uint32 columnIndex = 0; @@ -750,7 +724,9 @@ SelectedBlockSkipList(StripeSkipList *stripeSkipList, bool *projectedColumnMask, } } - selectedBlockSkipNodeArray = palloc0(columnCount * sizeof(ColumnBlockSkipNode *)); + ColumnBlockSkipNode **selectedBlockSkipNodeArray = palloc0(columnCount * + sizeof(ColumnBlockSkipNode + *)); for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { uint32 selectedBlockIndex = 0; @@ -779,7 +755,7 @@ SelectedBlockSkipList(StripeSkipList *stripeSkipList, bool *projectedColumnMask, } } - SelectedBlockSkipList = palloc0(sizeof(StripeSkipList)); + StripeSkipList *SelectedBlockSkipList = palloc0(sizeof(StripeSkipList)); SelectedBlockSkipList->blockSkipNodeArray = selectedBlockSkipNodeArray; SelectedBlockSkipList->blockCount = selectedBlockCount; SelectedBlockSkipList->columnCount = stripeSkipList->columnCount; @@ -882,14 +858,12 @@ DeserializeDatumArray(StringInfo datumBuffer, bool *existsArray, uint32 datumCou for (datumIndex = 0; datumIndex < datumCount; datumIndex++) { - char *currentDatumDataPointer = NULL; - if (!existsArray[datumIndex]) { continue; } - currentDatumDataPointer = datumBuffer->data + currentDatumDataOffset; + char *currentDatumDataPointer = datumBuffer->data + currentDatumDataOffset; datumArray[datumIndex] = fetch_att(currentDatumDataPointer, datumTypeByValue, datumTypeLength); @@ -940,11 +914,10 @@ DeserializeBlockData(StripeBuffers *stripeBuffers, uint64 blockIndex, { ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; - StringInfo valueBuffer = NULL; /* decompress and deserialize current block's data */ - valueBuffer = DecompressBuffer(blockBuffers->valueBuffer, - blockBuffers->valueCompressionType); + StringInfo valueBuffer = DecompressBuffer(blockBuffers->valueBuffer, + blockBuffers->valueCompressionType); if (blockBuffers->valueCompressionType != COMPRESSION_NONE) { @@ -1045,17 +1018,13 @@ ReadFromSmgr(Relation rel, uint64 offset, uint32 size) while (read < size) { - Buffer buffer; - Page page; - PageHeader phdr; - uint32 to_read; SmgrAddr addr = logical_to_smgr(offset + read); - buffer = ReadBuffer(rel, addr.blockno); - page = BufferGetPage(buffer); - phdr = (PageHeader) page; + Buffer buffer = ReadBuffer(rel, addr.blockno); + Page page = BufferGetPage(buffer); + PageHeader phdr = (PageHeader) page; - to_read = Min(size - read, phdr->pd_upper - addr.offset); + uint32 to_read = Min(size - read, phdr->pd_upper - addr.offset); memcpy(resultBuffer->data + read, page + addr.offset, to_read); ReleaseBuffer(buffer); read += to_read; diff --git a/src/backend/columnar/cstore_tableam.c b/src/backend/columnar/cstore_tableam.c index f0dd44de2..203cbb334 100644 --- a/src/backend/columnar/cstore_tableam.c +++ b/src/backend/columnar/cstore_tableam.c @@ -125,13 +125,10 @@ CStoreTableAMDefaultOptions() static CStoreOptions * CStoreTableAMGetOptions(Relation rel) { - CStoreOptions *cstoreOptions = NULL; - DataFileMetadata *metadata = NULL; - Assert(rel != NULL); - cstoreOptions = palloc0(sizeof(CStoreOptions)); - metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); + DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); cstoreOptions->compressionType = metadata->compression; cstoreOptions->stripeRowCount = metadata->stripeRowCount; cstoreOptions->blockRowCount = metadata->blockRowCount; @@ -213,15 +210,14 @@ RelationColumnList(Relation rel) int32 vartypmod = tupdesc->attrs[i].atttypmod; Oid varcollid = tupdesc->attrs[i].attcollation; Index varlevelsup = 0; - Var *var; if (tupdesc->attrs[i].attisdropped) { continue; } - var = makeVar(varno, varattno, vartype, vartypmod, - varcollid, varlevelsup); + Var *var = makeVar(varno, varattno, vartype, vartypmod, + varcollid, varlevelsup); columnList = lappend(columnList, var); } @@ -242,7 +238,6 @@ cstore_beginscan(Relation relation, Snapshot snapshot, ParallelTableScanDesc parallel_scan, uint32 flags) { - TableScanDesc scandesc; int natts = relation->rd_att->natts; Bitmapset *attr_needed = NULL; @@ -251,8 +246,9 @@ cstore_beginscan(Relation relation, Snapshot snapshot, /* the cstore access method does not use the flags, they are specific to heap */ flags = 0; - scandesc = cstore_beginscan_extended(relation, snapshot, nkeys, key, parallel_scan, - flags, attr_needed, NULL); + TableScanDesc scandesc = cstore_beginscan_extended(relation, snapshot, nkeys, key, + parallel_scan, + flags, attr_needed, NULL); pfree(attr_needed); @@ -267,9 +263,7 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, uint32 flags, Bitmapset *attr_needed, List *scanQual) { TupleDesc tupdesc = relation->rd_att; - TableReadState *readState = NULL; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); - List *columnList = NIL; List *neededColumnList = NIL; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); ListCell *columnCell = NULL; @@ -281,7 +275,7 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, scan->cs_base.rs_flags = flags; scan->cs_base.rs_parallel = parallel_scan; - columnList = RelationColumnList(relation); + List *columnList = RelationColumnList(relation); /* only collect columns that we need for the scan */ foreach(columnCell, columnList) @@ -293,7 +287,8 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, } } - readState = CStoreBeginRead(relation, tupdesc, neededColumnList, scanQual); + TableReadState *readState = CStoreBeginRead(relation, tupdesc, neededColumnList, + scanQual); scan->cs_readState = readState; @@ -324,13 +319,12 @@ static bool cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { CStoreScanDesc scan = (CStoreScanDesc) sscan; - bool nextRowFound; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); ExecClearTuple(slot); - nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, - slot->tts_isnull); + bool nextRowFound = CStoreReadNextRow(scan->cs_readState, slot->tts_values, + slot->tts_isnull); MemoryContextSwitchTo(oldContext); @@ -443,12 +437,11 @@ static void cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { - HeapTuple heapTuple; MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); cstore_init_write_state(relation); - heapTuple = ExecCopySlotHeapTuple(slot); + HeapTuple heapTuple = ExecCopySlotHeapTuple(slot); if (HeapTupleHasExternal(heapTuple)) { /* detoast any toasted attributes */ @@ -559,7 +552,6 @@ cstore_relation_set_new_filenode(Relation rel, TransactionId *freezeXid, MultiXactId *minmulti) { - SMgrRelation srel; DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); uint64 blockRowCount = 0; uint64 stripeRowCount = 0; @@ -587,7 +579,7 @@ cstore_relation_set_new_filenode(Relation rel, Assert(persistence == RELPERSISTENCE_PERMANENT); *freezeXid = RecentXmin; *minmulti = GetOldestMultiXactId(); - srel = RelationCreateStorage(*newrnode, persistence); + SMgrRelation srel = RelationCreateStorage(*newrnode, persistence); InitCStoreDataFileMetadata(newrnode->relNode, blockRowCount, stripeRowCount, compression); smgrclose(srel); @@ -639,11 +631,6 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, double *tups_vacuumed, double *tups_recently_dead) { - TableWriteState *writeState = NULL; - TableReadState *readState = NULL; - CStoreOptions *cstoreOptions = NULL; - Datum *values = NULL; - bool *nulls = NULL; TupleDesc sourceDesc = RelationGetDescr(OldHeap); TupleDesc targetDesc = RelationGetDescr(NewHeap); @@ -664,7 +651,7 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * relation first. */ - cstoreOptions = CStoreTableAMGetOptions(OldHeap); + CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(OldHeap); UpdateCStoreDataFileMetadata(NewHeap->rd_node.relNode, cstoreOptions->blockRowCount, @@ -673,16 +660,17 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, cstoreOptions = CStoreTableAMGetOptions(NewHeap); - writeState = CStoreBeginWrite(NewHeap, - cstoreOptions->compressionType, - cstoreOptions->stripeRowCount, - cstoreOptions->blockRowCount, - targetDesc); + TableWriteState *writeState = CStoreBeginWrite(NewHeap, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + targetDesc); - readState = CStoreBeginRead(OldHeap, sourceDesc, RelationColumnList(OldHeap), NULL); + TableReadState *readState = CStoreBeginRead(OldHeap, sourceDesc, RelationColumnList( + OldHeap), NULL); - values = palloc0(sourceDesc->natts * sizeof(Datum)); - nulls = palloc0(sourceDesc->natts * sizeof(bool)); + Datum *values = palloc0(sourceDesc->natts * sizeof(Datum)); + bool *nulls = palloc0(sourceDesc->natts * sizeof(bool)); *num_tuples = 0; @@ -727,7 +715,6 @@ cstore_vacuum_rel(Relation rel, VacuumParams *params, static void LogRelationStats(Relation rel, int elevel) { - DataFileMetadata *datafileMetadata = NULL; ListCell *stripeMetadataCell = NULL; Oid relfilenode = rel->rd_node.relNode; StringInfo infoBuf = makeStringInfo(); @@ -736,13 +723,11 @@ LogRelationStats(Relation rel, int elevel) uint64 totalStripeLength = 0; uint64 tupleCount = 0; uint64 blockCount = 0; - uint64 relPages = 0; - int stripeCount = 0; TupleDesc tupdesc = RelationGetDescr(rel); uint64 droppedBlocksWithData = 0; - datafileMetadata = ReadDataFileMetadata(relfilenode, false); - stripeCount = list_length(datafileMetadata->stripeMetadataList); + DataFileMetadata *datafileMetadata = ReadDataFileMetadata(relfilenode, false); + int stripeCount = list_length(datafileMetadata->stripeMetadataList); foreach(stripeMetadataCell, datafileMetadata->stripeMetadataList) { @@ -777,7 +762,7 @@ LogRelationStats(Relation rel, int elevel) } RelationOpenSmgr(rel); - relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + uint64 relPages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); appendStringInfo(infoBuf, "total file size: %ld, total data size: %ld\n", @@ -815,9 +800,6 @@ static void TruncateCStore(Relation rel, int elevel) { PGRUsage ru0; - BlockNumber old_rel_pages = 0; - BlockNumber new_rel_pages = 0; - SmgrAddr highestPhysicalAddress; pg_rusage_init(&ru0); @@ -851,7 +833,7 @@ TruncateCStore(Relation rel, int elevel) } RelationOpenSmgr(rel); - old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + BlockNumber old_rel_pages = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); RelationCloseSmgr(rel); /* @@ -859,10 +841,10 @@ TruncateCStore(Relation rel, int elevel) * new stripes be added beyond highestPhysicalAddress while * we're truncating. */ - highestPhysicalAddress = + SmgrAddr highestPhysicalAddress = logical_to_smgr(GetHighestUsedAddress(rel->rd_node.relNode)); - new_rel_pages = highestPhysicalAddress.blockno + 1; + BlockNumber new_rel_pages = highestPhysicalAddress.blockno + 1; if (new_rel_pages == old_rel_pages) { UnlockRelation(rel, AccessExclusiveLock); @@ -1104,11 +1086,9 @@ CStoreTableAMProcessUtility(PlannedStmt * plannedStatement, if (nodeTag(parseTree) == T_CreateTrigStmt) { CreateTrigStmt *createTrigStmt = (CreateTrigStmt *) parseTree; - Relation rel; - bool isCStore; - rel = relation_openrv(createTrigStmt->relation, AccessShareLock); - isCStore = rel->rd_tableam == GetCstoreTableAmRoutine(); + Relation rel = relation_openrv(createTrigStmt->relation, AccessShareLock); + bool isCStore = rel->rd_tableam == GetCstoreTableAmRoutine(); relation_close(rel, AccessShareLock); if (isCStore && @@ -1201,9 +1181,6 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId static bool IsCStoreTableAmTable(Oid relationId) { - bool result; - Relation rel; - if (!OidIsValid(relationId)) { return false; @@ -1213,8 +1190,8 @@ IsCStoreTableAmTable(Oid relationId) * Lock relation to prevent it from being dropped & * avoid race conditions. */ - rel = relation_open(relationId, AccessShareLock); - result = rel->rd_tableam == GetCstoreTableAmRoutine(); + Relation rel = relation_open(relationId, AccessShareLock); + bool result = rel->rd_tableam == GetCstoreTableAmRoutine(); relation_close(rel, NoLock); return result; @@ -1317,9 +1294,6 @@ Datum alter_cstore_table_set(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); - int blockRowCount = 0; - int stripeRowCount = 0; - CompressionType compression = COMPRESSION_TYPE_INVALID; Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */ DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); @@ -1329,9 +1303,9 @@ alter_cstore_table_set(PG_FUNCTION_ARGS) quote_identifier(RelationGetRelationName(rel))))); } - blockRowCount = metadata->blockRowCount; - stripeRowCount = metadata->stripeRowCount; - compression = metadata->compression; + int blockRowCount = metadata->blockRowCount; + int stripeRowCount = metadata->stripeRowCount; + CompressionType compression = metadata->compression; /* block_row_count => not null */ if (!PG_ARGISNULL(1)) @@ -1375,9 +1349,6 @@ Datum alter_cstore_table_reset(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); - int blockRowCount = 0; - int stripeRowCount = 0; - CompressionType compression = COMPRESSION_TYPE_INVALID; Relation rel = table_open(relationId, AccessExclusiveLock); /* ALTER TABLE LOCK */ DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, true); @@ -1387,9 +1358,9 @@ alter_cstore_table_reset(PG_FUNCTION_ARGS) quote_identifier(RelationGetRelationName(rel))))); } - blockRowCount = metadata->blockRowCount; - stripeRowCount = metadata->stripeRowCount; - compression = metadata->compression; + int blockRowCount = metadata->blockRowCount; + int stripeRowCount = metadata->stripeRowCount; + CompressionType compression = metadata->compression; /* block_row_count => true */ if (!PG_ARGISNULL(1) && PG_GETARG_BOOL(1)) diff --git a/src/backend/columnar/cstore_writer.c b/src/backend/columnar/cstore_writer.c index 873cda956..dcaf5d122 100644 --- a/src/backend/columnar/cstore_writer.c +++ b/src/backend/columnar/cstore_writer.c @@ -61,17 +61,11 @@ CStoreBeginWrite(Relation relation, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) { - TableWriteState *writeState = NULL; - FmgrInfo **comparisonFunctionArray = NULL; - MemoryContext stripeWriteContext = NULL; - uint32 columnCount = 0; uint32 columnIndex = 0; - bool *columnMaskArray = NULL; - BlockData *blockData = NULL; /* get comparison function pointers for each of the columns */ - columnCount = tupleDescriptor->natts; - comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *)); + uint32 columnCount = tupleDescriptor->natts; + FmgrInfo **comparisonFunctionArray = palloc0(columnCount * sizeof(FmgrInfo *)); for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { FmgrInfo *comparisonFunction = NULL; @@ -94,16 +88,17 @@ CStoreBeginWrite(Relation relation, * reset this memory context once we have flushed the stripe to the file. * This is to avoid memory leaks. */ - stripeWriteContext = AllocSetContextCreate(CurrentMemoryContext, - "Stripe Write Memory Context", - ALLOCSET_DEFAULT_SIZES); + MemoryContext stripeWriteContext = AllocSetContextCreate(CurrentMemoryContext, + "Stripe Write Memory Context", + ALLOCSET_DEFAULT_SIZES); - columnMaskArray = palloc(columnCount * sizeof(bool)); + bool *columnMaskArray = palloc(columnCount * sizeof(bool)); memset(columnMaskArray, true, columnCount); - blockData = CreateEmptyBlockData(columnCount, columnMaskArray, blockRowCount); + BlockData *blockData = CreateEmptyBlockData(columnCount, columnMaskArray, + blockRowCount); - writeState = palloc0(sizeof(TableWriteState)); + TableWriteState *writeState = palloc0(sizeof(TableWriteState)); writeState->relation = relation; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; @@ -132,8 +127,6 @@ void CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNulls) { uint32 columnIndex = 0; - uint32 blockIndex = 0; - uint32 blockRowIndex = 0; StripeBuffers *stripeBuffers = writeState->stripeBuffers; StripeSkipList *stripeSkipList = writeState->stripeSkipList; uint32 columnCount = writeState->tupleDescriptor->natts; @@ -161,8 +154,8 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul } } - blockIndex = stripeBuffers->rowCount / blockRowCount; - blockRowIndex = stripeBuffers->rowCount % blockRowCount; + uint32 blockIndex = stripeBuffers->rowCount / blockRowCount; + uint32 blockRowIndex = stripeBuffers->rowCount % blockRowCount; for (columnIndex = 0; columnIndex < columnCount; columnIndex++) { @@ -257,7 +250,6 @@ static StripeBuffers * CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount) { - StripeBuffers *stripeBuffers = NULL; uint32 columnIndex = 0; uint32 maxBlockCount = (stripeMaxRowCount / blockRowCount) + 1; ColumnBuffers **columnBuffersArray = palloc0(columnCount * sizeof(ColumnBuffers *)); @@ -280,7 +272,7 @@ CreateEmptyStripeBuffers(uint32 stripeMaxRowCount, uint32 blockRowCount, columnBuffersArray[columnIndex]->blockBuffersArray = blockBuffersArray; } - stripeBuffers = palloc0(sizeof(StripeBuffers)); + StripeBuffers *stripeBuffers = palloc0(sizeof(StripeBuffers)); stripeBuffers->columnBuffersArray = columnBuffersArray; stripeBuffers->columnCount = columnCount; stripeBuffers->rowCount = 0; @@ -298,7 +290,6 @@ static StripeSkipList * CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, uint32 columnCount) { - StripeSkipList *stripeSkipList = NULL; uint32 columnIndex = 0; uint32 maxBlockCount = (stripeMaxRowCount / blockRowCount) + 1; @@ -310,7 +301,7 @@ CreateEmptyStripeSkipList(uint32 stripeMaxRowCount, uint32 blockRowCount, palloc0(maxBlockCount * sizeof(ColumnBlockSkipNode)); } - stripeSkipList = palloc0(sizeof(StripeSkipList)); + StripeSkipList *stripeSkipList = palloc0(sizeof(StripeSkipList)); stripeSkipList->columnCount = columnCount; stripeSkipList->blockCount = 0; stripeSkipList->blockSkipNodeArray = blockSkipNodeArray; @@ -328,13 +319,9 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) while (remaining > 0) { SmgrAddr addr = logical_to_smgr(logicalOffset); - BlockNumber nblocks; - Page page; - PageHeader phdr; - uint64 to_write; RelationOpenSmgr(rel); - nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); + BlockNumber nblocks = smgrnblocks(rel->rd_smgr, MAIN_FORKNUM); Assert(addr.blockno < nblocks); (void) nblocks; /* keep compiler quiet */ RelationCloseSmgr(rel); @@ -342,8 +329,8 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) buffer = ReadBuffer(rel, addr.blockno); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); - page = BufferGetPage(buffer); - phdr = (PageHeader) page; + Page page = BufferGetPage(buffer); + PageHeader phdr = (PageHeader) page; if (PageIsNew(page)) { PageInit(page, BLCKSZ, 0); @@ -366,7 +353,7 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) START_CRIT_SECTION(); - to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); + uint64 to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); memcpy(page + phdr->pd_lower, data, to_write); phdr->pd_lower += to_write; @@ -374,8 +361,6 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) if (RelationNeedsWAL(rel)) { - XLogRecPtr recptr = 0; - XLogBeginInsert(); /* @@ -384,7 +369,7 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) */ XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE); - recptr = XLogInsert(RM_GENERIC_ID, 0); + XLogRecPtr recptr = XLogInsert(RM_GENERIC_ID, 0); PageSetLSN(page, recptr); } @@ -420,7 +405,6 @@ FlushStripe(TableWriteState *writeState) uint32 blockRowCount = writeState->blockRowCount; uint32 lastBlockIndex = stripeBuffers->rowCount / blockRowCount; uint32 lastBlockRowCount = stripeBuffers->rowCount % blockRowCount; - uint64 currentFileOffset = 0; uint64 stripeSize = 0; uint64 stripeRowCount = 0; @@ -477,7 +461,7 @@ FlushStripe(TableWriteState *writeState) stripeRowCount, columnCount, blockCount, blockRowCount); - currentFileOffset = stripeMetadata.fileOffset; + uint64 currentFileOffset = stripeMetadata.fileOffset; /* * Each stripe has only one section: @@ -531,11 +515,10 @@ FlushStripe(TableWriteState *writeState) static StringInfo SerializeBoolArray(bool *boolArray, uint32 boolArrayLength) { - StringInfo boolArrayBuffer = NULL; uint32 boolArrayIndex = 0; uint32 byteCount = (boolArrayLength + 7) / 8; - boolArrayBuffer = makeStringInfo(); + StringInfo boolArrayBuffer = makeStringInfo(); enlargeStringInfo(boolArrayBuffer, byteCount); boolArrayBuffer->len = byteCount; memset(boolArrayBuffer->data, 0, byteCount); @@ -564,11 +547,10 @@ SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, { uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum); uint32 datumLengthAligned = att_align_nominal(datumLength, datumTypeAlign); - char *currentDatumDataPointer = NULL; enlargeStringInfo(datumBuffer, datumLengthAligned); - currentDatumDataPointer = datumBuffer->data + datumBuffer->len; + char *currentDatumDataPointer = datumBuffer->data + datumBuffer->len; memset(currentDatumDataPointer, 0, datumLengthAligned); if (datumTypeLength > 0) @@ -624,11 +606,9 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou { ColumnBuffers *columnBuffers = stripeBuffers->columnBuffersArray[columnIndex]; ColumnBlockBuffers *blockBuffers = columnBuffers->blockBuffersArray[blockIndex]; - StringInfo serializedValueBuffer = NULL; CompressionType actualCompressionType = COMPRESSION_NONE; - bool compressed = false; - serializedValueBuffer = blockData->valueBufferArray[columnIndex]; + StringInfo serializedValueBuffer = blockData->valueBufferArray[columnIndex]; /* the only other supported compression type is pg_lz for now */ Assert(requestedCompressionType == COMPRESSION_NONE || @@ -638,8 +618,8 @@ SerializeBlockData(TableWriteState *writeState, uint32 blockIndex, uint32 rowCou * if serializedValueBuffer is be compressed, update serializedValueBuffer * with compressed data and store compression type. */ - compressed = CompressBuffer(serializedValueBuffer, compressionBuffer, - requestedCompressionType); + bool compressed = CompressBuffer(serializedValueBuffer, compressionBuffer, + requestedCompressionType); if (compressed) { serializedValueBuffer = compressionBuffer; diff --git a/src/test/regress/expected/am_vacuum_vs_insert.out b/src/test/regress/expected/am_vacuum_vs_insert.out index d463bd076..f5ef08673 100644 --- a/src/test/regress/expected/am_vacuum_vs_insert.out +++ b/src/test/regress/expected/am_vacuum_vs_insert.out @@ -1,13 +1,13 @@ Parsed test spec with 2 sessions starting permutation: s1-insert s1-begin s1-insert s2-vacuum s1-commit s2-select -step s1-insert: +step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; -step s1-begin: +step s1-begin: BEGIN; -step s1-insert: +step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: statistics for "test_vacuum_vs_insert": @@ -16,38 +16,38 @@ total row count: 3, stripe count: 1, average rows per stripe: 3 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 s2: INFO: "test_vacuum_vs_insert": stopping truncate due to conflicting lock request -step s2-vacuum: +step s2-vacuum: VACUUM VERBOSE test_vacuum_vs_insert; -step s1-commit: +step s1-commit: COMMIT; -step s2-select: +step s2-select: SELECT * FROM test_vacuum_vs_insert; -a b +a b -1 2 -2 4 -3 6 -1 2 -2 4 -3 6 +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 starting permutation: s1-insert s1-begin s1-insert s2-vacuum-full s1-commit s2-select -step s1-insert: +step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; -step s1-begin: +step s1-begin: BEGIN; -step s1-insert: +step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; -step s2-vacuum-full: +step s2-vacuum-full: VACUUM FULL VERBOSE test_vacuum_vs_insert; -step s1-commit: +step s1-commit: COMMIT; s2: INFO: vacuuming "public.test_vacuum_vs_insert" @@ -55,14 +55,14 @@ s2: INFO: "test_vacuum_vs_insert": found 0 removable, 6 nonremovable row versio DETAIL: 0 dead row versions cannot be removed yet. CPU: user: 0.00 s, system: 0.00 s, elapsed: 0.00 s. step s2-vacuum-full: <... completed> -step s2-select: +step s2-select: SELECT * FROM test_vacuum_vs_insert; -a b +a b -1 2 -2 4 -3 6 -1 2 -2 4 -3 6 +1 2 +2 4 +3 6 +1 2 +2 4 +3 6 diff --git a/src/test/regress/expected/am_write_concurrency.out b/src/test/regress/expected/am_write_concurrency.out index 41c6ee7e6..96a83ab61 100644 --- a/src/test/regress/expected/am_write_concurrency.out +++ b/src/test/regress/expected/am_write_concurrency.out @@ -1,142 +1,142 @@ Parsed test spec with 2 sessions starting permutation: s1-begin s2-begin s1-insert s2-insert s1-select s2-select s1-commit s2-commit s1-select -step s1-begin: +step s1-begin: BEGIN; -step s2-begin: +step s2-begin: BEGIN; -step s1-insert: +step s1-insert: INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(1, 3) i; -step s2-insert: +step s2-insert: INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; -step s1-select: +step s1-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -1 2 -2 4 -3 6 -step s2-select: +1 2 +2 4 +3 6 +step s2-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -4 8 -5 10 -6 12 -step s1-commit: +4 8 +5 10 +6 12 +step s1-commit: COMMIT; -step s2-commit: +step s2-commit: COMMIT; -step s1-select: +step s1-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -1 2 -2 4 -3 6 -4 8 -5 10 -6 12 +1 2 +2 4 +3 6 +4 8 +5 10 +6 12 starting permutation: s1-begin s2-begin s1-copy s2-insert s1-select s2-select s1-commit s2-commit s1-select -step s1-begin: +step s1-begin: BEGIN; -step s2-begin: +step s2-begin: BEGIN; -step s1-copy: +step s1-copy: COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; -step s2-insert: +step s2-insert: INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; -step s1-select: +step s1-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -11 -12 -13 -step s2-select: +11 +12 +13 +step s2-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -4 8 -5 10 -6 12 -step s1-commit: +4 8 +5 10 +6 12 +step s1-commit: COMMIT; -step s2-commit: +step s2-commit: COMMIT; -step s1-select: +step s1-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -4 8 -5 10 -6 12 -11 -12 -13 +4 8 +5 10 +6 12 +11 +12 +13 starting permutation: s1-begin s2-begin s2-insert s1-copy s1-select s2-select s1-commit s2-commit s1-select -step s1-begin: +step s1-begin: BEGIN; -step s2-begin: +step s2-begin: BEGIN; -step s2-insert: +step s2-insert: INSERT INTO test_insert_concurrency SELECT i, 2 * i FROM generate_series(4, 6) i; -step s1-copy: +step s1-copy: COPY test_insert_concurrency(a) FROM PROGRAM 'seq 11 13'; -step s1-select: +step s1-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -11 -12 -13 -step s2-select: +11 +12 +13 +step s2-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -4 8 -5 10 -6 12 -step s1-commit: +4 8 +5 10 +6 12 +step s1-commit: COMMIT; -step s2-commit: +step s2-commit: COMMIT; -step s1-select: +step s1-select: SELECT * FROM test_insert_concurrency ORDER BY a; -a b +a b -4 8 -5 10 -6 12 -11 -12 -13 +4 8 +5 10 +6 12 +11 +12 +13 diff --git a/src/test/regress/expected/create.out b/src/test/regress/expected/create.out index 39b477c81..ef7c8def4 100644 --- a/src/test/regress/expected/create.out +++ b/src/test/regress/expected/create.out @@ -1,6 +1,6 @@ Parsed test spec with 1 sessions starting permutation: s1a -step s1a: +step s1a: CREATE EXTENSION cstore_fdw; diff --git a/src/test/regress/input/fdw_create.source b/src/test/regress/input/fdw_create.source index bb3a38e28..de834ceb0 100644 --- a/src/test/regress/input/fdw_create.source +++ b/src/test/regress/input/fdw_create.source @@ -6,19 +6,19 @@ CREATE SERVER cstore_server FOREIGN DATA WRAPPER cstore_fdw; -- Validator tests -CREATE FOREIGN TABLE test_validator_invalid_option () - SERVER cstore_server +CREATE FOREIGN TABLE test_validator_invalid_option () + SERVER cstore_server OPTIONS(bad_option_name '1'); -- ERROR -CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () +CREATE FOREIGN TABLE test_validator_invalid_stripe_row_count () SERVER cstore_server OPTIONS(stripe_row_count '0'); -- ERROR -CREATE FOREIGN TABLE test_validator_invalid_block_row_count () +CREATE FOREIGN TABLE test_validator_invalid_block_row_count () SERVER cstore_server OPTIONS(block_row_count '0'); -- ERROR -CREATE FOREIGN TABLE test_validator_invalid_compression_type () +CREATE FOREIGN TABLE test_validator_invalid_compression_type () SERVER cstore_server OPTIONS(compression 'invalid_compression'); -- ERROR diff --git a/src/test/regress/sql/am_truncate.sql b/src/test/regress/sql/am_truncate.sql index 5d27a69fb..50a472144 100644 --- a/src/test/regress/sql/am_truncate.sql +++ b/src/test/regress/sql/am_truncate.sql @@ -125,7 +125,7 @@ SELECT count(*) FROM truncate_schema.truncate_tbl; TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT count(*) FROM truncate_schema.truncate_tbl; --- switch to super user, grant truncate to truncate_user +-- switch to super user, grant truncate to truncate_user \c - :current_user GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; diff --git a/src/test/regress/sql/fdw_truncate.sql b/src/test/regress/sql/fdw_truncate.sql index ed2aaa04a..f9e3b15fc 100644 --- a/src/test/regress/sql/fdw_truncate.sql +++ b/src/test/regress/sql/fdw_truncate.sql @@ -118,7 +118,7 @@ SELECT count(*) FROM truncate_schema.truncate_tbl; TRUNCATE TABLE truncate_schema.truncate_tbl; SELECT count(*) FROM truncate_schema.truncate_tbl; --- switch to super user, grant truncate to truncate_user +-- switch to super user, grant truncate to truncate_user \c - :current_user GRANT TRUNCATE ON TABLE truncate_schema.truncate_tbl TO truncate_user; From 3e5df81e89a671c906b2f0569e3c97b00d81f82a Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Thu, 5 Nov 2020 17:22:31 +0100 Subject: [PATCH 114/124] remove use of banned api --- src/backend/columnar/cstore_fdw.c | 5 +++-- src/backend/columnar/cstore_metadata_tables.c | 11 ++++++++--- src/backend/columnar/cstore_reader.c | 4 +++- src/backend/columnar/cstore_writer.c | 15 ++++++++++----- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/backend/columnar/cstore_fdw.c b/src/backend/columnar/cstore_fdw.c index e7b1a76b8..97e6f5a3c 100644 --- a/src/backend/columnar/cstore_fdw.c +++ b/src/backend/columnar/cstore_fdw.c @@ -78,6 +78,7 @@ #include "columnar/cstore.h" #include "columnar/cstore_fdw.h" #include "columnar/cstore_version_compat.h" +#include "distributed/citus_safe_lib.h" /* table containing information about how to partition distributed tables */ #define CITUS_EXTENSION_NAME "citus" @@ -474,8 +475,8 @@ CStoreProcessCopyCommand(CopyStmt *copyStatement, const char *queryString, if (completionTag != NULL) { - snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "COPY " UINT64_FORMAT, - processedCount); + SafeSnprintf(completionTag, COMPLETION_TAG_BUFSIZE, "COPY " UINT64_FORMAT, + processedCount); } } diff --git a/src/backend/columnar/cstore_metadata_tables.c b/src/backend/columnar/cstore_metadata_tables.c index f23aaba2d..301dd0c91 100644 --- a/src/backend/columnar/cstore_metadata_tables.c +++ b/src/backend/columnar/cstore_metadata_tables.c @@ -10,6 +10,8 @@ #include "postgres.h" +#include "safe_lib.h" + #include "columnar/cstore.h" #include "columnar/cstore_version_compat.h" @@ -885,12 +887,14 @@ DatumToBytea(Datum value, Form_pg_attribute attrForm) } else { - memcpy(VARDATA(result), DatumGetPointer(value), attrForm->attlen); + memcpy_s(VARDATA(result), datumLength + VARHDRSZ, + DatumGetPointer(value), attrForm->attlen); } } else { - memcpy(VARDATA(result), DatumGetPointer(value), datumLength); + memcpy_s(VARDATA(result), datumLength + VARHDRSZ, + DatumGetPointer(value), datumLength); } return result; @@ -909,7 +913,8 @@ ByteaToDatum(bytea *bytes, Form_pg_attribute attrForm) * after the byteaDatum is freed. */ char *binaryDataCopy = palloc0(VARSIZE_ANY_EXHDR(bytes)); - memcpy(binaryDataCopy, VARDATA_ANY(bytes), VARSIZE_ANY_EXHDR(bytes)); + memcpy_s(binaryDataCopy, VARSIZE_ANY_EXHDR(bytes), + VARDATA_ANY(bytes), VARSIZE_ANY_EXHDR(bytes)); return fetch_att(binaryDataCopy, attrForm->attbyval, attrForm->attlen); } diff --git a/src/backend/columnar/cstore_reader.c b/src/backend/columnar/cstore_reader.c index 8818baf78..4faf2562c 100644 --- a/src/backend/columnar/cstore_reader.c +++ b/src/backend/columnar/cstore_reader.c @@ -16,6 +16,8 @@ #include "postgres.h" +#include "safe_lib.h" + #include "access/nbtree.h" #include "catalog/pg_am.h" #include "commands/defrem.h" @@ -1025,7 +1027,7 @@ ReadFromSmgr(Relation rel, uint64 offset, uint32 size) PageHeader phdr = (PageHeader) page; uint32 to_read = Min(size - read, phdr->pd_upper - addr.offset); - memcpy(resultBuffer->data + read, page + addr.offset, to_read); + memcpy_s(resultBuffer->data + read, size - read, page + addr.offset, to_read); ReleaseBuffer(buffer); read += to_read; } diff --git a/src/backend/columnar/cstore_writer.c b/src/backend/columnar/cstore_writer.c index dcaf5d122..735024fbf 100644 --- a/src/backend/columnar/cstore_writer.c +++ b/src/backend/columnar/cstore_writer.c @@ -16,6 +16,8 @@ #include "postgres.h" +#include "safe_lib.h" + #include "access/nbtree.h" #include "catalog/pg_am.h" #include "miscadmin.h" @@ -354,7 +356,7 @@ WriteToSmgr(Relation rel, uint64 logicalOffset, char *data, uint32 dataLength) START_CRIT_SECTION(); uint64 to_write = Min(phdr->pd_upper - phdr->pd_lower, remaining); - memcpy(page + phdr->pd_lower, data, to_write); + memcpy_s(page + phdr->pd_lower, phdr->pd_upper - phdr->pd_lower, data, to_write); phdr->pd_lower += to_write; MarkBufferDirty(buffer); @@ -561,13 +563,15 @@ SerializeSingleDatum(StringInfo datumBuffer, Datum datum, bool datumTypeByValue, } else { - memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumTypeLength); + memcpy_s(currentDatumDataPointer, datumBuffer->maxlen - datumBuffer->len, + DatumGetPointer(datum), datumTypeLength); } } else { Assert(!datumTypeByValue); - memcpy(currentDatumDataPointer, DatumGetPointer(datum), datumLength); + memcpy_s(currentDatumDataPointer, datumBuffer->maxlen - datumBuffer->len, + DatumGetPointer(datum), datumLength); } datumBuffer->len += datumLengthAligned; @@ -714,7 +718,7 @@ DatumCopy(Datum datum, bool datumTypeByValue, int datumTypeLength) { uint32 datumLength = att_addlength_datum(0, datumTypeLength, datum); char *datumData = palloc0(datumLength); - memcpy(datumData, DatumGetPointer(datum), datumLength); + memcpy_s(datumData, datumLength, DatumGetPointer(datum), datumLength); datumCopy = PointerGetDatum(datumData); } @@ -737,7 +741,8 @@ CopyStringInfo(StringInfo sourceString) targetString->data = palloc0(sourceString->len); targetString->len = sourceString->len; targetString->maxlen = sourceString->len; - memcpy(targetString->data, sourceString->data, sourceString->len); + memcpy_s(targetString->data, sourceString->len, + sourceString->data, sourceString->len); } return targetString; From d065bb495d1787fa97f0924fc028b4e98360db72 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Fri, 6 Nov 2020 16:00:49 +0100 Subject: [PATCH 115/124] Prepare downgrade script and bump development version to 10.0-1 --- .../columnar/sql/columnar--9.5-1--10.0-1.sql | 2 +- .../downgrades/columnar--10.0-1--9.5-1.sql | 45 +++++++++++++++++++ .../sql/downgrades/citus--10.0-1--9.5-1.sql | 2 + src/test/regress/expected/multi_extension.out | 23 ++++++++-- src/test/regress/sql/multi_extension.sql | 4 +- 5 files changed, 69 insertions(+), 7 deletions(-) create mode 100644 src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql diff --git a/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql b/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql index b80239f6a..7c6ee9111 100644 --- a/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql +++ b/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql @@ -26,7 +26,7 @@ CREATE EVENT TRIGGER cstore_ddl_event_end ON ddl_command_end EXECUTE PROCEDURE cstore_ddl_event_end_trigger(); -CREATE FUNCTION public.cstore_table_size(relation regclass) +CREATE FUNCTION pg_catalog.cstore_table_size(relation regclass) RETURNS bigint AS 'MODULE_PATHNAME' LANGUAGE C STRICT; diff --git a/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql b/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql new file mode 100644 index 000000000..4e6826d0c --- /dev/null +++ b/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql @@ -0,0 +1,45 @@ +/* columnar--10.0-1--9.5-1.sql */ + +SET search_path TO cstore; + +DO $proc$ +BEGIN + +IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN + EXECUTE $$ + DROP FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool, + stripe_row_count bool, + compression bool); + + DROP FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int, + stripe_row_count int, + compression name); + + DROP ACCESS METHOD cstore_tableam; + + DROP FUNCTION cstore_tableam_handler(internal); + + $$; +END IF; +END$proc$; + +DROP VIEW cstore_options; +DROP TABLE cstore_skipnodes; +DROP TABLE cstore_stripes; +DROP TABLE cstore_data_files; + +DROP FUNCTION pg_catalog.cstore_table_size(relation regclass); + +DROP EVENT TRIGGER cstore_ddl_event_end; +DROP FUNCTION cstore_ddl_event_end_trigger(); + +DROP FOREIGN DATA WRAPPER cstore_fdw; +DROP FUNCTION cstore_fdw_validator(text[], oid); +DROP FUNCTION cstore_fdw_handler(); + +RESET search_path; +DROP SCHEMA cstore; diff --git a/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql b/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql index e07dcfbd8..e721a4660 100644 --- a/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql +++ b/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql @@ -1,2 +1,4 @@ -- citus--10.0-1--9.5-1 -- this is an empty downgrade path since citus--9.5-1--10.0-1.sql is empty for now + +#include "../../../columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql" diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 1f8434840..19fda1889 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -80,7 +80,7 @@ FROM pg_depend AS pgd, WHERE pgd.refclassid = 'pg_extension'::regclass AND pgd.refobjid = pge.oid AND pge.extname = 'citus' AND - pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test'); + pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test', 'cstore'); count --------------------------------------------------------------------- 0 @@ -465,9 +465,24 @@ SELECT * FROM print_extension_changes(); -- Snapshot of state at 10.0-1 ALTER EXTENSION citus UPDATE TO '10.0-1'; SELECT * FROM print_extension_changes(); - previous_object | current_object + previous_object | current_object --------------------------------------------------------------------- -(0 rows) + | access method cstore_tableam + | event trigger cstore_ddl_event_end + | foreign-data wrapper cstore_fdw + | function alter_cstore_table_reset(regclass,boolean,boolean,boolean) + | function alter_cstore_table_set(regclass,integer,integer,name) + | function cstore.cstore_ddl_event_end_trigger() + | function cstore.cstore_fdw_handler() + | function cstore.cstore_fdw_validator(text[],oid) + | function cstore.cstore_tableam_handler(internal) + | function cstore_table_size(regclass) + | schema cstore + | table cstore.cstore_data_files + | table cstore.cstore_skipnodes + | table cstore.cstore_stripes + | view cstore.cstore_options +(15 rows) DROP TABLE prev_objects, extension_diff; -- show running version @@ -485,7 +500,7 @@ FROM pg_depend AS pgd, WHERE pgd.refclassid = 'pg_extension'::regclass AND pgd.refobjid = pge.oid AND pge.extname = 'citus' AND - pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test'); + pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test', 'cstore'); count --------------------------------------------------------------------- 0 diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index 49c5413d4..4444bb5f4 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -83,7 +83,7 @@ FROM pg_depend AS pgd, WHERE pgd.refclassid = 'pg_extension'::regclass AND pgd.refobjid = pge.oid AND pge.extname = 'citus' AND - pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test'); + pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test', 'cstore'); -- DROP EXTENSION pre-created by the regression suite @@ -238,7 +238,7 @@ FROM pg_depend AS pgd, WHERE pgd.refclassid = 'pg_extension'::regclass AND pgd.refobjid = pge.oid AND pge.extname = 'citus' AND - pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test'); + pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test', 'cstore'); -- see incompatible version errors out RESET citus.enable_version_checks; From 691fdb2c64a8e3f0ec7fa1acb2225a63fc77d431 Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Mon, 9 Nov 2020 23:23:50 -0800 Subject: [PATCH 116/124] Don't grab in additional locks cstore code when truncating --- src/backend/columnar/cstore_fdw.c | 13 ++++++++++++- src/backend/columnar/cstore_tableam.c | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/backend/columnar/cstore_fdw.c b/src/backend/columnar/cstore_fdw.c index 97e6f5a3c..19b3d1847 100644 --- a/src/backend/columnar/cstore_fdw.c +++ b/src/backend/columnar/cstore_fdw.c @@ -718,7 +718,7 @@ FindCStoreTables(List *tableList) foreach(relationCell, tableList) { RangeVar *rangeVar = (RangeVar *) lfirst(relationCell); - Oid relationId = RangeVarGetRelid(rangeVar, AccessShareLock, true); + Oid relationId = RangeVarGetRelid(rangeVar, NoLock, true); if (IsCStoreFdwTable(relationId) && !DistributedTable(relationId)) { cstoreTableList = lappend(cstoreTableList, rangeVar); @@ -758,6 +758,17 @@ OpenRelationsForTruncate(List *cstoreTableList) { heap_close(relation, AccessExclusiveLock); } + + /* + * Type of cstore tables can change since we checked last time, + * since we didn't hold a lock when checking. + */ + else if (!IsCStoreFdwTable(relationId)) + { + ereport(ERROR, (errmsg("relation \"%s\" not columnar anymore", + RelationGetRelationName(relation)), + errhint("try repeating the truncate command"))); + } else { relationIdList = lappend_oid(relationIdList, relationId); diff --git a/src/backend/columnar/cstore_tableam.c b/src/backend/columnar/cstore_tableam.c index 203cbb334..d7c3aff11 100644 --- a/src/backend/columnar/cstore_tableam.c +++ b/src/backend/columnar/cstore_tableam.c @@ -58,7 +58,7 @@ * These are the same values from src/backend/access/heap/vacuumlazy.c */ #define VACUUM_TRUNCATE_LOCK_WAIT_INTERVAL 50 /* ms */ -#define VACUUM_TRUNCATE_LOCK_TIMEOUT 5000 /* ms */ +#define VACUUM_TRUNCATE_LOCK_TIMEOUT 4500 /* ms */ typedef struct CStoreScanDescData { From 29875351729693fe8b047d4e8eaa681680d4f62d Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 10 Nov 2020 14:26:18 +0100 Subject: [PATCH 117/124] add pg upgrade tests verifying table am is created --- src/test/regress/after_pg_upgrade_schedule | 2 +- .../expected/upgrade_list_citus_objects.out | 226 ++++++++++++++++++ .../expected/upgrade_list_citus_objects_0.out | 222 +++++++++++++++++ .../sql/upgrade_list_citus_objects.sql | 12 + 4 files changed, 461 insertions(+), 1 deletion(-) create mode 100644 src/test/regress/expected/upgrade_list_citus_objects.out create mode 100644 src/test/regress/expected/upgrade_list_citus_objects_0.out create mode 100644 src/test/regress/sql/upgrade_list_citus_objects.sql diff --git a/src/test/regress/after_pg_upgrade_schedule b/src/test/regress/after_pg_upgrade_schedule index 243325c51..1d4d45039 100644 --- a/src/test/regress/after_pg_upgrade_schedule +++ b/src/test/regress/after_pg_upgrade_schedule @@ -1 +1 @@ -test: upgrade_basic_after upgrade_type_after upgrade_ref2ref_after upgrade_distributed_function_after upgrade_rebalance_strategy_after +test: upgrade_basic_after upgrade_type_after upgrade_ref2ref_after upgrade_distributed_function_after upgrade_rebalance_strategy_after upgrade_list_citus_objects diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out new file mode 100644 index 000000000..c0bceef2c --- /dev/null +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -0,0 +1,226 @@ +-- print version above 11 (eg. 12 and above) +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 11 AS version_above_eleven; + version_above_eleven +--------------------------------------------------------------------- + t +(1 row) + +-- list all postgres objects belonging to the citus extension +SELECT pg_catalog.pg_describe_object(classid, objid, 0) AS description +FROM pg_catalog.pg_depend, pg_catalog.pg_extension e +WHERE refclassid = 'pg_catalog.pg_extension'::pg_catalog.regclass + AND refobjid = e.oid + AND deptype = 'e' + AND e.extname='citus' +ORDER BY 1; + description +--------------------------------------------------------------------- + access method cstore_tableam + event trigger citus_cascade_to_partition + event trigger cstore_ddl_event_end + foreign-data wrapper cstore_fdw + function alter_cstore_table_reset(regclass,boolean,boolean,boolean) + function alter_cstore_table_set(regclass,integer,integer,name) + function alter_role_if_exists(text,text) + function any_value(anyelement) + function any_value_agg(anyelement,anyelement) + function array_cat_agg(anyarray) + function assign_distributed_transaction_id(integer,bigint,timestamp with time zone) + function authinfo_valid(text) + function broadcast_intermediate_result(text,text) + function check_distributed_deadlocks() + function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real) + function citus_blocking_pids(integer) + function citus_create_restore_point(text) + function citus_dist_stat_activity() + function citus_drop_trigger() + function citus_executor_name(integer) + function citus_extradata_container(internal) + function citus_finish_pg_upgrade() + function citus_internal.find_groupid_for_node(text,integer) + function citus_internal.pg_dist_node_trigger_func() + function citus_internal.pg_dist_rebalance_strategy_enterprise_check() + function citus_internal.pg_dist_rebalance_strategy_trigger_func() + function citus_internal.pg_dist_shard_placement_trigger_func() + function citus_internal.refresh_isolation_tester_prepared_statement() + function citus_internal.replace_isolation_tester_func() + function citus_internal.restore_isolation_tester_func() + function citus_isolation_test_session_is_blocked(integer,integer[]) + function citus_json_concatenate(json,json) + function citus_json_concatenate_final(json) + function citus_jsonb_concatenate(jsonb,jsonb) + function citus_jsonb_concatenate_final(jsonb) + function citus_node_capacity_1(integer) + function citus_prepare_pg_upgrade() + function citus_query_stats() + function citus_relation_size(regclass) + function citus_remote_connection_stats() + function citus_server_id() + function citus_set_default_rebalance_strategy(text) + function citus_shard_allowed_on_node_true(bigint,integer) + function citus_shard_cost_1(bigint) + function citus_shard_cost_by_disk_size(bigint) + function citus_stat_statements() + function citus_stat_statements_reset() + function citus_table_is_visible(oid) + function citus_table_size(regclass) + function citus_text_send_as_jsonb(text) + function citus_total_relation_size(regclass) + function citus_truncate_trigger() + function citus_validate_rebalance_strategy_functions(regproc,regproc,regproc) + function citus_version() + function citus_worker_stat_activity() + function column_name_to_column(regclass,text) + function column_to_column_name(regclass,text) + function coord_combine_agg(oid,cstring,anyelement) + function coord_combine_agg_ffunc(internal,oid,cstring,anyelement) + function coord_combine_agg_sfunc(internal,oid,cstring,anyelement) + function create_citus_local_table(regclass) + function create_distributed_function(regprocedure,text,text) + function create_distributed_table(regclass,text,citus.distribution_type,text) + function create_intermediate_result(text,text) + function create_reference_table(regclass) + function cstore.cstore_ddl_event_end_trigger() + function cstore.cstore_fdw_handler() + function cstore.cstore_fdw_validator(text[],oid) + function cstore.cstore_tableam_handler(internal) + function cstore_table_size(regclass) + function distributed_tables_colocated(regclass,regclass) + function dump_global_wait_edges() + function dump_local_wait_edges() + function fetch_intermediate_results(text[],text,integer) + function get_all_active_transactions() + function get_colocated_shard_array(bigint) + function get_colocated_table_array(regclass) + function get_current_transaction_id() + function get_global_active_transactions() + function get_rebalance_progress() + function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name) + function get_shard_id_for_distribution_column(regclass,"any") + function isolate_tenant_to_new_shard(regclass,"any",text) + function json_cat_agg(json) + function jsonb_cat_agg(jsonb) + function lock_relation_if_exists(text,text) + function lock_shard_metadata(integer,bigint[]) + function lock_shard_resources(integer,bigint[]) + function mark_tables_colocated(regclass,regclass[]) + function master_activate_node(text,integer) + function master_add_inactive_node(text,integer,integer,noderole,name) + function master_add_node(text,integer,integer,noderole,name) + function master_add_secondary_node(text,integer,text,integer,name) + function master_append_table_to_shard(bigint,text,text,integer) + function master_apply_delete_command(text) + function master_conninfo_cache_invalidate() + function master_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) + function master_create_distributed_table(regclass,text,citus.distribution_type) + function master_create_empty_shard(text) + function master_create_worker_shards(text,integer,integer) + function master_disable_node(text,integer) + function master_dist_local_group_cache_invalidate() + function master_dist_node_cache_invalidate() + function master_dist_object_cache_invalidate() + function master_dist_partition_cache_invalidate() + function master_dist_placement_cache_invalidate() + function master_dist_shard_cache_invalidate() + function master_drain_node(text,integer,citus.shard_transfer_mode,name) + function master_drop_all_shards(regclass,text,text) + function master_get_active_worker_nodes() + function master_get_new_placementid() + function master_get_new_shardid() + function master_get_table_ddl_events(text) + function master_get_table_metadata(text) + function master_modify_multiple_shards(text) + function master_move_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) + function master_remove_distributed_table_metadata_from_workers(regclass,text,text) + function master_remove_node(text,integer) + function master_remove_partition_metadata(regclass,text,text) + function master_run_on_worker(text[],integer[],text[],boolean) + function master_set_node_property(text,integer,text,boolean) + function master_unmark_object_distributed(oid,oid,integer) + function master_update_node(integer,text,integer,boolean,integer) + function master_update_shard_statistics(bigint) + function master_update_table_statistics(regclass) + function poolinfo_valid(text) + function read_intermediate_result(text,citus_copy_format) + function read_intermediate_results(text[],citus_copy_format) + function rebalance_table_shards(regclass,real,integer,bigint[],citus.shard_transfer_mode,boolean,name) + function recover_prepared_transactions() + function relation_is_a_known_shard(regclass) + function replicate_reference_tables() + function replicate_table_shards(regclass,integer,integer,bigint[],citus.shard_transfer_mode) + function role_exists(name) + function run_command_on_colocated_placements(regclass,regclass,text,boolean) + function run_command_on_placements(regclass,text,boolean) + function run_command_on_shards(regclass,text,boolean) + function run_command_on_workers(text,boolean) + function shard_name(regclass,bigint) + function start_metadata_sync_to_node(text,integer) + function stop_metadata_sync_to_node(text,integer) + function truncate_local_data_after_distributing_table(regclass) + function undistribute_table(regclass) + function update_distributed_table_colocation(regclass,text) + function upgrade_to_reference_table(regclass) + function worker_append_table_to_shard(text,text,text,integer) + function worker_apply_inter_shard_ddl_command(bigint,text,bigint,text,text) + function worker_apply_sequence_command(text) + function worker_apply_sequence_command(text,regtype) + function worker_apply_shard_ddl_command(bigint,text) + function worker_apply_shard_ddl_command(bigint,text,text) + function worker_cleanup_job_schema_cache() + function worker_create_or_alter_role(text,text,text) + function worker_create_or_replace_object(text) + function worker_create_schema(bigint,text) + function worker_create_truncate_trigger(regclass) + function worker_drop_distributed_table(text) + function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) + function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) + function worker_hash("any") + function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) + function worker_last_saved_explain_analyze() + function worker_merge_files_into_table(bigint,integer,text[],text[]) + function worker_partial_agg(oid,anyelement) + function worker_partial_agg_ffunc(internal) + function worker_partial_agg_sfunc(internal,oid,anyelement) + function worker_partition_query_result(text,text,integer,citus.distribution_type,text[],text[],boolean) + function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) + function worker_record_sequence_dependency(regclass,regclass,name) + function worker_repartition_cleanup(bigint) + function worker_save_query_explain_analyze(text,jsonb) + schema citus + schema citus_internal + schema cstore + sequence pg_dist_colocationid_seq + sequence pg_dist_groupid_seq + sequence pg_dist_node_nodeid_seq + sequence pg_dist_placement_placementid_seq + sequence pg_dist_shardid_seq + table citus.pg_dist_object + table cstore.cstore_data_files + table cstore.cstore_skipnodes + table cstore.cstore_stripes + table pg_dist_authinfo + table pg_dist_colocation + table pg_dist_local_group + table pg_dist_node + table pg_dist_node_metadata + table pg_dist_partition + table pg_dist_placement + table pg_dist_poolinfo + table pg_dist_rebalance_strategy + table pg_dist_shard + table pg_dist_transaction + type citus.distribution_type + type citus.shard_transfer_mode + type citus_copy_format + type noderole + view citus_dist_stat_activity + view citus_lock_waits + view citus_shard_indexes_on_worker + view citus_shards_on_worker + view citus_stat_statements + view citus_worker_stat_activity + view cstore.cstore_options + view pg_dist_shard_placement +(206 rows) + diff --git a/src/test/regress/expected/upgrade_list_citus_objects_0.out b/src/test/regress/expected/upgrade_list_citus_objects_0.out new file mode 100644 index 000000000..ec53e1f75 --- /dev/null +++ b/src/test/regress/expected/upgrade_list_citus_objects_0.out @@ -0,0 +1,222 @@ +-- print version above 11 (eg. 12 and above) +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 11 AS version_above_eleven; + version_above_eleven +--------------------------------------------------------------------- + f +(1 row) + +-- list all postgres objects belonging to the citus extension +SELECT pg_catalog.pg_describe_object(classid, objid, 0) AS description +FROM pg_catalog.pg_depend, pg_catalog.pg_extension e +WHERE refclassid = 'pg_catalog.pg_extension'::pg_catalog.regclass + AND refobjid = e.oid + AND deptype = 'e' + AND e.extname='citus' +ORDER BY 1; + description +--------------------------------------------------------------------- + event trigger citus_cascade_to_partition + event trigger cstore_ddl_event_end + foreign-data wrapper cstore_fdw + function alter_role_if_exists(text,text) + function any_value(anyelement) + function any_value_agg(anyelement,anyelement) + function array_cat_agg(anyarray) + function assign_distributed_transaction_id(integer,bigint,timestamp with time zone) + function authinfo_valid(text) + function broadcast_intermediate_result(text,text) + function check_distributed_deadlocks() + function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real) + function citus_blocking_pids(integer) + function citus_create_restore_point(text) + function citus_dist_stat_activity() + function citus_drop_trigger() + function citus_executor_name(integer) + function citus_extradata_container(internal) + function citus_finish_pg_upgrade() + function citus_internal.find_groupid_for_node(text,integer) + function citus_internal.pg_dist_node_trigger_func() + function citus_internal.pg_dist_rebalance_strategy_enterprise_check() + function citus_internal.pg_dist_rebalance_strategy_trigger_func() + function citus_internal.pg_dist_shard_placement_trigger_func() + function citus_internal.refresh_isolation_tester_prepared_statement() + function citus_internal.replace_isolation_tester_func() + function citus_internal.restore_isolation_tester_func() + function citus_isolation_test_session_is_blocked(integer,integer[]) + function citus_json_concatenate(json,json) + function citus_json_concatenate_final(json) + function citus_jsonb_concatenate(jsonb,jsonb) + function citus_jsonb_concatenate_final(jsonb) + function citus_node_capacity_1(integer) + function citus_prepare_pg_upgrade() + function citus_query_stats() + function citus_relation_size(regclass) + function citus_remote_connection_stats() + function citus_server_id() + function citus_set_default_rebalance_strategy(text) + function citus_shard_allowed_on_node_true(bigint,integer) + function citus_shard_cost_1(bigint) + function citus_shard_cost_by_disk_size(bigint) + function citus_stat_statements() + function citus_stat_statements_reset() + function citus_table_is_visible(oid) + function citus_table_size(regclass) + function citus_text_send_as_jsonb(text) + function citus_total_relation_size(regclass) + function citus_truncate_trigger() + function citus_validate_rebalance_strategy_functions(regproc,regproc,regproc) + function citus_version() + function citus_worker_stat_activity() + function column_name_to_column(regclass,text) + function column_to_column_name(regclass,text) + function coord_combine_agg(oid,cstring,anyelement) + function coord_combine_agg_ffunc(internal,oid,cstring,anyelement) + function coord_combine_agg_sfunc(internal,oid,cstring,anyelement) + function create_citus_local_table(regclass) + function create_distributed_function(regprocedure,text,text) + function create_distributed_table(regclass,text,citus.distribution_type,text) + function create_intermediate_result(text,text) + function create_reference_table(regclass) + function cstore.cstore_ddl_event_end_trigger() + function cstore.cstore_fdw_handler() + function cstore.cstore_fdw_validator(text[],oid) + function cstore_table_size(regclass) + function distributed_tables_colocated(regclass,regclass) + function dump_global_wait_edges() + function dump_local_wait_edges() + function fetch_intermediate_results(text[],text,integer) + function get_all_active_transactions() + function get_colocated_shard_array(bigint) + function get_colocated_table_array(regclass) + function get_current_transaction_id() + function get_global_active_transactions() + function get_rebalance_progress() + function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name) + function get_shard_id_for_distribution_column(regclass,"any") + function isolate_tenant_to_new_shard(regclass,"any",text) + function json_cat_agg(json) + function jsonb_cat_agg(jsonb) + function lock_relation_if_exists(text,text) + function lock_shard_metadata(integer,bigint[]) + function lock_shard_resources(integer,bigint[]) + function mark_tables_colocated(regclass,regclass[]) + function master_activate_node(text,integer) + function master_add_inactive_node(text,integer,integer,noderole,name) + function master_add_node(text,integer,integer,noderole,name) + function master_add_secondary_node(text,integer,text,integer,name) + function master_append_table_to_shard(bigint,text,text,integer) + function master_apply_delete_command(text) + function master_conninfo_cache_invalidate() + function master_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) + function master_create_distributed_table(regclass,text,citus.distribution_type) + function master_create_empty_shard(text) + function master_create_worker_shards(text,integer,integer) + function master_disable_node(text,integer) + function master_dist_local_group_cache_invalidate() + function master_dist_node_cache_invalidate() + function master_dist_object_cache_invalidate() + function master_dist_partition_cache_invalidate() + function master_dist_placement_cache_invalidate() + function master_dist_shard_cache_invalidate() + function master_drain_node(text,integer,citus.shard_transfer_mode,name) + function master_drop_all_shards(regclass,text,text) + function master_get_active_worker_nodes() + function master_get_new_placementid() + function master_get_new_shardid() + function master_get_table_ddl_events(text) + function master_get_table_metadata(text) + function master_modify_multiple_shards(text) + function master_move_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) + function master_remove_distributed_table_metadata_from_workers(regclass,text,text) + function master_remove_node(text,integer) + function master_remove_partition_metadata(regclass,text,text) + function master_run_on_worker(text[],integer[],text[],boolean) + function master_set_node_property(text,integer,text,boolean) + function master_unmark_object_distributed(oid,oid,integer) + function master_update_node(integer,text,integer,boolean,integer) + function master_update_shard_statistics(bigint) + function master_update_table_statistics(regclass) + function poolinfo_valid(text) + function read_intermediate_result(text,citus_copy_format) + function read_intermediate_results(text[],citus_copy_format) + function rebalance_table_shards(regclass,real,integer,bigint[],citus.shard_transfer_mode,boolean,name) + function recover_prepared_transactions() + function relation_is_a_known_shard(regclass) + function replicate_reference_tables() + function replicate_table_shards(regclass,integer,integer,bigint[],citus.shard_transfer_mode) + function role_exists(name) + function run_command_on_colocated_placements(regclass,regclass,text,boolean) + function run_command_on_placements(regclass,text,boolean) + function run_command_on_shards(regclass,text,boolean) + function run_command_on_workers(text,boolean) + function shard_name(regclass,bigint) + function start_metadata_sync_to_node(text,integer) + function stop_metadata_sync_to_node(text,integer) + function truncate_local_data_after_distributing_table(regclass) + function undistribute_table(regclass) + function update_distributed_table_colocation(regclass,text) + function upgrade_to_reference_table(regclass) + function worker_append_table_to_shard(text,text,text,integer) + function worker_apply_inter_shard_ddl_command(bigint,text,bigint,text,text) + function worker_apply_sequence_command(text) + function worker_apply_sequence_command(text,regtype) + function worker_apply_shard_ddl_command(bigint,text) + function worker_apply_shard_ddl_command(bigint,text,text) + function worker_cleanup_job_schema_cache() + function worker_create_or_alter_role(text,text,text) + function worker_create_or_replace_object(text) + function worker_create_schema(bigint,text) + function worker_create_truncate_trigger(regclass) + function worker_drop_distributed_table(text) + function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) + function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) + function worker_hash("any") + function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) + function worker_last_saved_explain_analyze() + function worker_merge_files_into_table(bigint,integer,text[],text[]) + function worker_partial_agg(oid,anyelement) + function worker_partial_agg_ffunc(internal) + function worker_partial_agg_sfunc(internal,oid,anyelement) + function worker_partition_query_result(text,text,integer,citus.distribution_type,text[],text[],boolean) + function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) + function worker_record_sequence_dependency(regclass,regclass,name) + function worker_repartition_cleanup(bigint) + function worker_save_query_explain_analyze(text,jsonb) + schema citus + schema citus_internal + schema cstore + sequence pg_dist_colocationid_seq + sequence pg_dist_groupid_seq + sequence pg_dist_node_nodeid_seq + sequence pg_dist_placement_placementid_seq + sequence pg_dist_shardid_seq + table citus.pg_dist_object + table cstore.cstore_data_files + table cstore.cstore_skipnodes + table cstore.cstore_stripes + table pg_dist_authinfo + table pg_dist_colocation + table pg_dist_local_group + table pg_dist_node + table pg_dist_node_metadata + table pg_dist_partition + table pg_dist_placement + table pg_dist_poolinfo + table pg_dist_rebalance_strategy + table pg_dist_shard + table pg_dist_transaction + type citus.distribution_type + type citus.shard_transfer_mode + type citus_copy_format + type noderole + view citus_dist_stat_activity + view citus_lock_waits + view citus_shard_indexes_on_worker + view citus_shards_on_worker + view citus_stat_statements + view citus_worker_stat_activity + view cstore.cstore_options + view pg_dist_shard_placement +(202 rows) + diff --git a/src/test/regress/sql/upgrade_list_citus_objects.sql b/src/test/regress/sql/upgrade_list_citus_objects.sql new file mode 100644 index 000000000..86a99a3a9 --- /dev/null +++ b/src/test/regress/sql/upgrade_list_citus_objects.sql @@ -0,0 +1,12 @@ +-- print version above 11 (eg. 12 and above) +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 11 AS version_above_eleven; + +-- list all postgres objects belonging to the citus extension +SELECT pg_catalog.pg_describe_object(classid, objid, 0) AS description +FROM pg_catalog.pg_depend, pg_catalog.pg_extension e +WHERE refclassid = 'pg_catalog.pg_extension'::pg_catalog.regclass + AND refobjid = e.oid + AND deptype = 'e' + AND e.extname='citus' +ORDER BY 1; From 7c891a01a96ab2f7dd33e96fa3933a807cf5a96c Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 10 Nov 2020 17:24:54 +0100 Subject: [PATCH 118/124] create missing objects during upgrade path --- ci/check_sql_snapshots.sh | 2 +- .../columnar/sql/columnar--9.5-1--10.0-1.sql | 30 +---- .../downgrades/columnar--10.0-1--9.5-1.sql | 2 + .../udfs/alter_cstore_table_reset/10.0-1.sql | 15 +++ .../udfs/alter_cstore_table_reset/latest.sql | 15 +++ .../udfs/alter_cstore_table_set/10.0-1.sql | 15 +++ .../udfs/alter_cstore_table_set/latest.sql | 15 +++ .../cstore_ensure_objects_exist/10.0-1.sql | 47 +++++++ .../cstore_ensure_objects_exist/latest.sql | 47 +++++++ .../udfs/cstore_tableam_handler/10.0-1.sql | 9 ++ .../udfs/cstore_tableam_handler/latest.sql | 9 ++ .../distributed/sql/citus--9.5-1--10.0-1.sql | 2 + .../sql/downgrades/citus--10.0-1--9.5-1.sql | 2 + .../udfs/citus_finish_pg_upgrade/10.0-1.sql | 116 ++++++++++++++++++ .../udfs/citus_finish_pg_upgrade/latest.sql | 2 + src/test/regress/expected/multi_extension.out | 3 +- .../expected/upgrade_list_citus_objects.out | 3 +- .../expected/upgrade_list_citus_objects_0.out | 3 +- 18 files changed, 308 insertions(+), 29 deletions(-) create mode 100644 src/backend/columnar/sql/udfs/alter_cstore_table_reset/10.0-1.sql create mode 100644 src/backend/columnar/sql/udfs/alter_cstore_table_reset/latest.sql create mode 100644 src/backend/columnar/sql/udfs/alter_cstore_table_set/10.0-1.sql create mode 100644 src/backend/columnar/sql/udfs/alter_cstore_table_set/latest.sql create mode 100644 src/backend/columnar/sql/udfs/cstore_ensure_objects_exist/10.0-1.sql create mode 100644 src/backend/columnar/sql/udfs/cstore_ensure_objects_exist/latest.sql create mode 100644 src/backend/columnar/sql/udfs/cstore_tableam_handler/10.0-1.sql create mode 100644 src/backend/columnar/sql/udfs/cstore_tableam_handler/latest.sql create mode 100644 src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.0-1.sql diff --git a/ci/check_sql_snapshots.sh b/ci/check_sql_snapshots.sh index 3c871ab8b..9a0335735 100755 --- a/ci/check_sql_snapshots.sh +++ b/ci/check_sql_snapshots.sh @@ -4,7 +4,7 @@ set -euo pipefail # shellcheck disable=SC1091 source ci/ci_helpers.sh -for udf_dir in src/backend/distributed/sql/udfs/*; do +for udf_dir in src/backend/distributed/sql/udfs/* src/backend/columnar/sql/udfs/*; do # We want to find the last snapshotted sql file, to make sure it's the same # as "latest.sql". This is done by: # 1. Getting the filenames in the UDF directory (using find instead of ls, to keep shellcheck happy) diff --git a/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql b/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql index 7c6ee9111..fe307e069 100644 --- a/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql +++ b/src/backend/columnar/sql/columnar--9.5-1--10.0-1.sql @@ -95,33 +95,13 @@ BEGIN -- user instead to add the missing objects IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN EXECUTE $$ - CREATE FUNCTION cstore_tableam_handler(internal) - RETURNS table_am_handler - LANGUAGE C - AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; - - CREATE ACCESS METHOD cstore_tableam - TYPE TABLE HANDLER cstore_tableam_handler; - - CREATE FUNCTION pg_catalog.alter_cstore_table_set( - table_name regclass, - block_row_count int DEFAULT NULL, - stripe_row_count int DEFAULT NULL, - compression name DEFAULT null) - RETURNS void - LANGUAGE C - AS 'MODULE_PATHNAME', 'alter_cstore_table_set'; - - CREATE FUNCTION pg_catalog.alter_cstore_table_reset( - table_name regclass, - block_row_count bool DEFAULT false, - stripe_row_count bool DEFAULT false, - compression bool DEFAULT false) - RETURNS void - LANGUAGE C - AS 'MODULE_PATHNAME', 'alter_cstore_table_reset'; +#include "udfs/cstore_tableam_handler/10.0-1.sql" +#include "udfs/alter_cstore_table_set/10.0-1.sql" +#include "udfs/alter_cstore_table_reset/10.0-1.sql" $$; END IF; END$proc$; +#include "udfs/cstore_ensure_objects_exist/10.0-1.sql" + RESET search_path; diff --git a/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql b/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql index 4e6826d0c..ecfa18f79 100644 --- a/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql +++ b/src/backend/columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql @@ -41,5 +41,7 @@ DROP FOREIGN DATA WRAPPER cstore_fdw; DROP FUNCTION cstore_fdw_validator(text[], oid); DROP FUNCTION cstore_fdw_handler(); +DROP FUNCTION citus_internal.cstore_ensure_objects_exist(); + RESET search_path; DROP SCHEMA cstore; diff --git a/src/backend/columnar/sql/udfs/alter_cstore_table_reset/10.0-1.sql b/src/backend/columnar/sql/udfs/alter_cstore_table_reset/10.0-1.sql new file mode 100644 index 000000000..67a59ac87 --- /dev/null +++ b/src/backend/columnar/sql/udfs/alter_cstore_table_reset/10.0-1.sql @@ -0,0 +1,15 @@ +CREATE OR REPLACE FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool DEFAULT false, + stripe_row_count bool DEFAULT false, + compression bool DEFAULT false) + RETURNS void + LANGUAGE C +AS 'MODULE_PATHNAME', 'alter_cstore_table_reset'; + +COMMENT ON FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool, + stripe_row_count bool, + compression bool) +IS 'reset on or more options on a cstore table to the system defaults'; diff --git a/src/backend/columnar/sql/udfs/alter_cstore_table_reset/latest.sql b/src/backend/columnar/sql/udfs/alter_cstore_table_reset/latest.sql new file mode 100644 index 000000000..67a59ac87 --- /dev/null +++ b/src/backend/columnar/sql/udfs/alter_cstore_table_reset/latest.sql @@ -0,0 +1,15 @@ +CREATE OR REPLACE FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool DEFAULT false, + stripe_row_count bool DEFAULT false, + compression bool DEFAULT false) + RETURNS void + LANGUAGE C +AS 'MODULE_PATHNAME', 'alter_cstore_table_reset'; + +COMMENT ON FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool, + stripe_row_count bool, + compression bool) +IS 'reset on or more options on a cstore table to the system defaults'; diff --git a/src/backend/columnar/sql/udfs/alter_cstore_table_set/10.0-1.sql b/src/backend/columnar/sql/udfs/alter_cstore_table_set/10.0-1.sql new file mode 100644 index 000000000..a630447d7 --- /dev/null +++ b/src/backend/columnar/sql/udfs/alter_cstore_table_set/10.0-1.sql @@ -0,0 +1,15 @@ +CREATE OR REPLACE FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int DEFAULT NULL, + stripe_row_count int DEFAULT NULL, + compression name DEFAULT null) + RETURNS void + LANGUAGE C +AS 'MODULE_PATHNAME', 'alter_cstore_table_set'; + +COMMENT ON FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int, + stripe_row_count int, + compression name) +IS 'set one or more options on a cstore table, when set to NULL no change is made'; diff --git a/src/backend/columnar/sql/udfs/alter_cstore_table_set/latest.sql b/src/backend/columnar/sql/udfs/alter_cstore_table_set/latest.sql new file mode 100644 index 000000000..a630447d7 --- /dev/null +++ b/src/backend/columnar/sql/udfs/alter_cstore_table_set/latest.sql @@ -0,0 +1,15 @@ +CREATE OR REPLACE FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int DEFAULT NULL, + stripe_row_count int DEFAULT NULL, + compression name DEFAULT null) + RETURNS void + LANGUAGE C +AS 'MODULE_PATHNAME', 'alter_cstore_table_set'; + +COMMENT ON FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int, + stripe_row_count int, + compression name) +IS 'set one or more options on a cstore table, when set to NULL no change is made'; diff --git a/src/backend/columnar/sql/udfs/cstore_ensure_objects_exist/10.0-1.sql b/src/backend/columnar/sql/udfs/cstore_ensure_objects_exist/10.0-1.sql new file mode 100644 index 000000000..69b56f99a --- /dev/null +++ b/src/backend/columnar/sql/udfs/cstore_ensure_objects_exist/10.0-1.sql @@ -0,0 +1,47 @@ +-- citus_internal.cstore_ensure_objects_exist is an internal helper function to create +-- missing objects, anything related to the table access methods. +-- Since the API for table access methods is only available in PG12 we can't create these +-- objects when Citus is installed in PG11. Once citus is installed on PG11 the user can +-- upgrade their database to PG12. Now they require the table access method objects that +-- we couldn't create before. +-- This internal function is called from `citus_finish_pg_upgrade` which the user is +-- required to call after a PG major upgrade. +CREATE OR REPLACE FUNCTION citus_internal.cstore_ensure_objects_exist() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog +AS $ceoe$ +BEGIN + +-- when postgres is version 12 or above we need to create the tableam. If the tableam +-- exist we assume all objects have been created. +IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN +IF NOT EXISTS (SELECT 1 FROM pg_am WHERE amname = 'cstore_tableam') THEN + +#include "../cstore_tableam_handler/10.0-1.sql" + +#include "../alter_cstore_table_set/10.0-1.sql" + +#include "../alter_cstore_table_reset/10.0-1.sql" + + -- add the missing objects to the extension + ALTER EXTENSION citus ADD FUNCTION cstore.cstore_tableam_handler(internal); + ALTER EXTENSION citus ADD ACCESS METHOD cstore_tableam; + ALTER EXTENSION citus ADD FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int, + stripe_row_count int, + compression name); + ALTER EXTENSION citus ADD FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool, + stripe_row_count bool, + compression bool); + +END IF; +END IF; +END; +$ceoe$; + +COMMENT ON FUNCTION citus_internal.cstore_ensure_objects_exist() + IS 'internal function to be called by pg_catalog.citus_finish_pg_upgrade responsible for creating the columnar objects'; diff --git a/src/backend/columnar/sql/udfs/cstore_ensure_objects_exist/latest.sql b/src/backend/columnar/sql/udfs/cstore_ensure_objects_exist/latest.sql new file mode 100644 index 000000000..69b56f99a --- /dev/null +++ b/src/backend/columnar/sql/udfs/cstore_ensure_objects_exist/latest.sql @@ -0,0 +1,47 @@ +-- citus_internal.cstore_ensure_objects_exist is an internal helper function to create +-- missing objects, anything related to the table access methods. +-- Since the API for table access methods is only available in PG12 we can't create these +-- objects when Citus is installed in PG11. Once citus is installed on PG11 the user can +-- upgrade their database to PG12. Now they require the table access method objects that +-- we couldn't create before. +-- This internal function is called from `citus_finish_pg_upgrade` which the user is +-- required to call after a PG major upgrade. +CREATE OR REPLACE FUNCTION citus_internal.cstore_ensure_objects_exist() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog +AS $ceoe$ +BEGIN + +-- when postgres is version 12 or above we need to create the tableam. If the tableam +-- exist we assume all objects have been created. +IF substring(current_Setting('server_version'), '\d+')::int >= 12 THEN +IF NOT EXISTS (SELECT 1 FROM pg_am WHERE amname = 'cstore_tableam') THEN + +#include "../cstore_tableam_handler/10.0-1.sql" + +#include "../alter_cstore_table_set/10.0-1.sql" + +#include "../alter_cstore_table_reset/10.0-1.sql" + + -- add the missing objects to the extension + ALTER EXTENSION citus ADD FUNCTION cstore.cstore_tableam_handler(internal); + ALTER EXTENSION citus ADD ACCESS METHOD cstore_tableam; + ALTER EXTENSION citus ADD FUNCTION pg_catalog.alter_cstore_table_set( + table_name regclass, + block_row_count int, + stripe_row_count int, + compression name); + ALTER EXTENSION citus ADD FUNCTION pg_catalog.alter_cstore_table_reset( + table_name regclass, + block_row_count bool, + stripe_row_count bool, + compression bool); + +END IF; +END IF; +END; +$ceoe$; + +COMMENT ON FUNCTION citus_internal.cstore_ensure_objects_exist() + IS 'internal function to be called by pg_catalog.citus_finish_pg_upgrade responsible for creating the columnar objects'; diff --git a/src/backend/columnar/sql/udfs/cstore_tableam_handler/10.0-1.sql b/src/backend/columnar/sql/udfs/cstore_tableam_handler/10.0-1.sql new file mode 100644 index 000000000..6d6a7db3f --- /dev/null +++ b/src/backend/columnar/sql/udfs/cstore_tableam_handler/10.0-1.sql @@ -0,0 +1,9 @@ +CREATE OR REPLACE FUNCTION cstore.cstore_tableam_handler(internal) + RETURNS table_am_handler + LANGUAGE C +AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; + +COMMENT ON FUNCTION cstore.cstore_tableam_handler(internal) + IS 'internal function returning the handler for cstore tables'; + +CREATE ACCESS METHOD cstore_tableam TYPE TABLE HANDLER cstore.cstore_tableam_handler; diff --git a/src/backend/columnar/sql/udfs/cstore_tableam_handler/latest.sql b/src/backend/columnar/sql/udfs/cstore_tableam_handler/latest.sql new file mode 100644 index 000000000..6d6a7db3f --- /dev/null +++ b/src/backend/columnar/sql/udfs/cstore_tableam_handler/latest.sql @@ -0,0 +1,9 @@ +CREATE OR REPLACE FUNCTION cstore.cstore_tableam_handler(internal) + RETURNS table_am_handler + LANGUAGE C +AS 'MODULE_PATHNAME', 'cstore_tableam_handler'; + +COMMENT ON FUNCTION cstore.cstore_tableam_handler(internal) + IS 'internal function returning the handler for cstore tables'; + +CREATE ACCESS METHOD cstore_tableam TYPE TABLE HANDLER cstore.cstore_tableam_handler; diff --git a/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql b/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql index e27645ff8..43895d047 100644 --- a/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql +++ b/src/backend/distributed/sql/citus--9.5-1--10.0-1.sql @@ -2,4 +2,6 @@ -- bump version to 10.0-1 +#include "udfs/citus_finish_pg_upgrade/10.0-1.sql" + #include "../../columnar/sql/columnar--9.5-1--10.0-1.sql" diff --git a/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql b/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql index e721a4660..a86527b1e 100644 --- a/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql +++ b/src/backend/distributed/sql/downgrades/citus--10.0-1--9.5-1.sql @@ -1,4 +1,6 @@ -- citus--10.0-1--9.5-1 -- this is an empty downgrade path since citus--9.5-1--10.0-1.sql is empty for now +#include "../udfs/citus_finish_pg_upgrade/9.5-1.sql" + #include "../../../columnar/sql/downgrades/columnar--10.0-1--9.5-1.sql" diff --git a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.0-1.sql b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.0-1.sql new file mode 100644 index 000000000..d0fdfc2e2 --- /dev/null +++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/10.0-1.sql @@ -0,0 +1,116 @@ +CREATE OR REPLACE FUNCTION pg_catalog.citus_finish_pg_upgrade() + RETURNS void + LANGUAGE plpgsql + SET search_path = pg_catalog + AS $cppu$ +DECLARE + table_name regclass; + command text; + trigger_name text; +BEGIN + -- + -- restore citus catalog tables + -- + INSERT INTO pg_catalog.pg_dist_partition SELECT * FROM public.pg_dist_partition; + INSERT INTO pg_catalog.pg_dist_shard SELECT * FROM public.pg_dist_shard; + INSERT INTO pg_catalog.pg_dist_placement SELECT * FROM public.pg_dist_placement; + INSERT INTO pg_catalog.pg_dist_node_metadata SELECT * FROM public.pg_dist_node_metadata; + INSERT INTO pg_catalog.pg_dist_node SELECT * FROM public.pg_dist_node; + INSERT INTO pg_catalog.pg_dist_local_group SELECT * FROM public.pg_dist_local_group; + INSERT INTO pg_catalog.pg_dist_transaction SELECT * FROM public.pg_dist_transaction; + INSERT INTO pg_catalog.pg_dist_colocation SELECT * FROM public.pg_dist_colocation; + -- enterprise catalog tables + INSERT INTO pg_catalog.pg_dist_authinfo SELECT * FROM public.pg_dist_authinfo; + INSERT INTO pg_catalog.pg_dist_poolinfo SELECT * FROM public.pg_dist_poolinfo; + + ALTER TABLE pg_catalog.pg_dist_rebalance_strategy DISABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; + INSERT INTO pg_catalog.pg_dist_rebalance_strategy SELECT + name, + default_strategy, + shard_cost_function::regprocedure::regproc, + node_capacity_function::regprocedure::regproc, + shard_allowed_on_node_function::regprocedure::regproc, + default_threshold, + minimum_threshold + FROM public.pg_dist_rebalance_strategy; + ALTER TABLE pg_catalog.pg_dist_rebalance_strategy ENABLE TRIGGER pg_dist_rebalance_strategy_enterprise_check_trigger; + + -- + -- drop backup tables + -- + DROP TABLE public.pg_dist_authinfo; + DROP TABLE public.pg_dist_colocation; + DROP TABLE public.pg_dist_local_group; + DROP TABLE public.pg_dist_node; + DROP TABLE public.pg_dist_node_metadata; + DROP TABLE public.pg_dist_partition; + DROP TABLE public.pg_dist_placement; + DROP TABLE public.pg_dist_poolinfo; + DROP TABLE public.pg_dist_shard; + DROP TABLE public.pg_dist_transaction; + DROP TABLE public.pg_dist_rebalance_strategy; + + -- + -- reset sequences + -- + PERFORM setval('pg_catalog.pg_dist_shardid_seq', (SELECT MAX(shardid)+1 AS max_shard_id FROM pg_dist_shard), false); + PERFORM setval('pg_catalog.pg_dist_placement_placementid_seq', (SELECT MAX(placementid)+1 AS max_placement_id FROM pg_dist_placement), false); + PERFORM setval('pg_catalog.pg_dist_groupid_seq', (SELECT MAX(groupid)+1 AS max_group_id FROM pg_dist_node), false); + PERFORM setval('pg_catalog.pg_dist_node_nodeid_seq', (SELECT MAX(nodeid)+1 AS max_node_id FROM pg_dist_node), false); + PERFORM setval('pg_catalog.pg_dist_colocationid_seq', (SELECT MAX(colocationid)+1 AS max_colocation_id FROM pg_dist_colocation), false); + + -- + -- register triggers + -- + FOR table_name IN SELECT logicalrelid FROM pg_catalog.pg_dist_partition + LOOP + trigger_name := 'truncate_trigger_' || table_name::oid; + command := 'create trigger ' || trigger_name || ' after truncate on ' || table_name || ' execute procedure pg_catalog.citus_truncate_trigger()'; + EXECUTE command; + command := 'update pg_trigger set tgisinternal = true where tgname = ' || quote_literal(trigger_name); + EXECUTE command; + END LOOP; + + -- + -- set dependencies + -- + INSERT INTO pg_depend + SELECT + 'pg_class'::regclass::oid as classid, + p.logicalrelid::regclass::oid as objid, + 0 as objsubid, + 'pg_extension'::regclass::oid as refclassid, + (select oid from pg_extension where extname = 'citus') as refobjid, + 0 as refobjsubid , + 'n' as deptype + FROM pg_catalog.pg_dist_partition p; + + -- restore pg_dist_object from the stable identifiers + -- DELETE/INSERT to avoid primary key violations + WITH old_records AS ( + DELETE FROM + citus.pg_dist_object + RETURNING + type, + object_names, + object_args, + distribution_argument_index, + colocationid + ) + INSERT INTO citus.pg_dist_object (classid, objid, objsubid, distribution_argument_index, colocationid) + SELECT + address.classid, + address.objid, + address.objsubid, + naming.distribution_argument_index, + naming.colocationid + FROM + old_records naming, + pg_get_object_address(naming.type, naming.object_names, naming.object_args) address; + + PERFORM citus_internal.cstore_ensure_objects_exist(); +END; +$cppu$; + +COMMENT ON FUNCTION pg_catalog.citus_finish_pg_upgrade() + IS 'perform tasks to restore citus settings from a location that has been prepared before pg_upgrade'; diff --git a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql index d936c958b..d0fdfc2e2 100644 --- a/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql +++ b/src/backend/distributed/sql/udfs/citus_finish_pg_upgrade/latest.sql @@ -107,6 +107,8 @@ BEGIN FROM old_records naming, pg_get_object_address(naming.type, naming.object_names, naming.object_args) address; + + PERFORM citus_internal.cstore_ensure_objects_exist(); END; $cppu$; diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 19fda1889..7ec7ad649 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -472,6 +472,7 @@ SELECT * FROM print_extension_changes(); | foreign-data wrapper cstore_fdw | function alter_cstore_table_reset(regclass,boolean,boolean,boolean) | function alter_cstore_table_set(regclass,integer,integer,name) + | function citus_internal.cstore_ensure_objects_exist() | function cstore.cstore_ddl_event_end_trigger() | function cstore.cstore_fdw_handler() | function cstore.cstore_fdw_validator(text[],oid) @@ -482,7 +483,7 @@ SELECT * FROM print_extension_changes(); | table cstore.cstore_skipnodes | table cstore.cstore_stripes | view cstore.cstore_options -(15 rows) +(16 rows) DROP TABLE prev_objects, extension_diff; -- show running version diff --git a/src/test/regress/expected/upgrade_list_citus_objects.out b/src/test/regress/expected/upgrade_list_citus_objects.out index c0bceef2c..4eb6216fc 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects.out +++ b/src/test/regress/expected/upgrade_list_citus_objects.out @@ -38,6 +38,7 @@ ORDER BY 1; function citus_executor_name(integer) function citus_extradata_container(internal) function citus_finish_pg_upgrade() + function citus_internal.cstore_ensure_objects_exist() function citus_internal.find_groupid_for_node(text,integer) function citus_internal.pg_dist_node_trigger_func() function citus_internal.pg_dist_rebalance_strategy_enterprise_check() @@ -222,5 +223,5 @@ ORDER BY 1; view citus_worker_stat_activity view cstore.cstore_options view pg_dist_shard_placement -(206 rows) +(207 rows) diff --git a/src/test/regress/expected/upgrade_list_citus_objects_0.out b/src/test/regress/expected/upgrade_list_citus_objects_0.out index ec53e1f75..03a4054e5 100644 --- a/src/test/regress/expected/upgrade_list_citus_objects_0.out +++ b/src/test/regress/expected/upgrade_list_citus_objects_0.out @@ -35,6 +35,7 @@ ORDER BY 1; function citus_executor_name(integer) function citus_extradata_container(internal) function citus_finish_pg_upgrade() + function citus_internal.cstore_ensure_objects_exist() function citus_internal.find_groupid_for_node(text,integer) function citus_internal.pg_dist_node_trigger_func() function citus_internal.pg_dist_rebalance_strategy_enterprise_check() @@ -218,5 +219,5 @@ ORDER BY 1; view citus_worker_stat_activity view cstore.cstore_options view pg_dist_shard_placement -(202 rows) +(203 rows) From 22df8027b03e4d4afb0b8352567eb9fedb5762e2 Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 17 Nov 2020 18:44:37 +0100 Subject: [PATCH 119/124] add extra output for multi_extension targeting pg11 --- src/test/regress/expected/multi_extension.out | 10 + .../regress/expected/multi_extension_0.out | 829 ++++++++++++++++++ src/test/regress/sql/multi_extension.sql | 5 + 3 files changed, 844 insertions(+) create mode 100644 src/test/regress/expected/multi_extension_0.out diff --git a/src/test/regress/expected/multi_extension.out b/src/test/regress/expected/multi_extension.out index 7ec7ad649..08f92c034 100644 --- a/src/test/regress/expected/multi_extension.out +++ b/src/test/regress/expected/multi_extension.out @@ -5,6 +5,16 @@ -- -- It'd be nice to script generation of this file, but alas, that's -- not done yet. +-- differentiate the output file for pg11 and versions above, with regards to objects +-- created per citus version depending on the postgres version. Upgrade tests verify the +-- objects are added in citus_finish_pg_upgrade() +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 11 AS version_above_eleven; + version_above_eleven +--------------------------------------------------------------------- + t +(1 row) + SET citus.next_shard_id TO 580000; SELECT $definition$ CREATE OR REPLACE FUNCTION test.maintenance_worker() diff --git a/src/test/regress/expected/multi_extension_0.out b/src/test/regress/expected/multi_extension_0.out new file mode 100644 index 000000000..9605e9d32 --- /dev/null +++ b/src/test/regress/expected/multi_extension_0.out @@ -0,0 +1,829 @@ +-- +-- MULTI_EXTENSION +-- +-- Tests around extension creation / upgrades +-- +-- It'd be nice to script generation of this file, but alas, that's +-- not done yet. +-- differentiate the output file for pg11 and versions above, with regards to objects +-- created per citus version depending on the postgres version. Upgrade tests verify the +-- objects are added in citus_finish_pg_upgrade() +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 11 AS version_above_eleven; + version_above_eleven +--------------------------------------------------------------------- + f +(1 row) + +SET citus.next_shard_id TO 580000; +SELECT $definition$ +CREATE OR REPLACE FUNCTION test.maintenance_worker() + RETURNS pg_stat_activity + LANGUAGE plpgsql +AS $$ +DECLARE + activity record; +BEGIN + DO 'BEGIN END'; -- Force maintenance daemon to start + -- we don't want to wait forever; loop will exit after 20 seconds + FOR i IN 1 .. 200 LOOP + PERFORM pg_stat_clear_snapshot(); + SELECT * INTO activity FROM pg_stat_activity + WHERE application_name = 'Citus Maintenance Daemon' AND datname = current_database(); + IF activity.pid IS NOT NULL THEN + RETURN activity; + ELSE + PERFORM pg_sleep(0.1); + END IF ; + END LOOP; + -- fail if we reach the end of this loop + raise 'Waited too long for maintenance daemon to start'; +END; +$$; +$definition$ create_function_test_maintenance_worker +\gset +CREATE TABLE prev_objects(description text); +CREATE TABLE extension_diff(previous_object text COLLATE "C", + current_object text COLLATE "C"); +CREATE FUNCTION print_extension_changes() +RETURNS TABLE(previous_object text, current_object text) +AS $func$ +BEGIN + TRUNCATE TABLE extension_diff; + + CREATE TABLE current_objects AS + SELECT pg_catalog.pg_describe_object(classid, objid, 0) AS description + FROM pg_catalog.pg_depend, pg_catalog.pg_extension e + WHERE refclassid = 'pg_catalog.pg_extension'::pg_catalog.regclass + AND refobjid = e.oid + AND deptype = 'e' + AND e.extname='citus'; + + INSERT INTO extension_diff + SELECT p.description previous_object, c.description current_object + FROM current_objects c FULL JOIN prev_objects p + ON p.description = c.description + WHERE p.description is null OR c.description is null; + + DROP TABLE prev_objects; + ALTER TABLE current_objects RENAME TO prev_objects; + + RETURN QUERY SELECT * FROM extension_diff ORDER BY 1, 2; +END +$func$ LANGUAGE plpgsql; +CREATE SCHEMA test; +:create_function_test_maintenance_worker +-- check maintenance daemon is started +SELECT datname, current_database(), + usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus') +FROM test.maintenance_worker(); + datname | current_database | usename | extowner +--------------------------------------------------------------------- + regression | regression | postgres | postgres +(1 row) + +-- ensure no objects were created outside pg_catalog +SELECT COUNT(*) +FROM pg_depend AS pgd, + pg_extension AS pge, + LATERAL pg_identify_object(pgd.classid, pgd.objid, pgd.objsubid) AS pgio +WHERE pgd.refclassid = 'pg_extension'::regclass AND + pgd.refobjid = pge.oid AND + pge.extname = 'citus' AND + pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test', 'cstore'); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- DROP EXTENSION pre-created by the regression suite +DROP EXTENSION citus; +\c +-- these tests switch between citus versions and call ddl's that require pg_dist_object to be created +SET citus.enable_object_propagation TO 'false'; +SET citus.enable_version_checks TO 'false'; +CREATE EXTENSION citus VERSION '7.0-1'; +ALTER EXTENSION citus UPDATE TO '7.0-2'; +ALTER EXTENSION citus UPDATE TO '7.0-3'; +ALTER EXTENSION citus UPDATE TO '7.0-4'; +ALTER EXTENSION citus UPDATE TO '7.0-5'; +ALTER EXTENSION citus UPDATE TO '7.0-6'; +ALTER EXTENSION citus UPDATE TO '7.0-7'; +ALTER EXTENSION citus UPDATE TO '7.0-8'; +ALTER EXTENSION citus UPDATE TO '7.0-9'; +ALTER EXTENSION citus UPDATE TO '7.0-10'; +ALTER EXTENSION citus UPDATE TO '7.0-11'; +ALTER EXTENSION citus UPDATE TO '7.0-12'; +ALTER EXTENSION citus UPDATE TO '7.0-13'; +ALTER EXTENSION citus UPDATE TO '7.0-14'; +ALTER EXTENSION citus UPDATE TO '7.0-15'; +ALTER EXTENSION citus UPDATE TO '7.1-1'; +ALTER EXTENSION citus UPDATE TO '7.1-2'; +ALTER EXTENSION citus UPDATE TO '7.1-3'; +ALTER EXTENSION citus UPDATE TO '7.1-4'; +ALTER EXTENSION citus UPDATE TO '7.2-1'; +ALTER EXTENSION citus UPDATE TO '7.2-2'; +ALTER EXTENSION citus UPDATE TO '7.2-3'; +ALTER EXTENSION citus UPDATE TO '7.3-3'; +ALTER EXTENSION citus UPDATE TO '7.4-1'; +ALTER EXTENSION citus UPDATE TO '7.4-2'; +ALTER EXTENSION citus UPDATE TO '7.4-3'; +ALTER EXTENSION citus UPDATE TO '7.5-1'; +ALTER EXTENSION citus UPDATE TO '7.5-2'; +ALTER EXTENSION citus UPDATE TO '7.5-3'; +ALTER EXTENSION citus UPDATE TO '7.5-4'; +ALTER EXTENSION citus UPDATE TO '7.5-5'; +ALTER EXTENSION citus UPDATE TO '7.5-6'; +ALTER EXTENSION citus UPDATE TO '7.5-7'; +ALTER EXTENSION citus UPDATE TO '8.0-1'; +ALTER EXTENSION citus UPDATE TO '8.0-2'; +ALTER EXTENSION citus UPDATE TO '8.0-3'; +ALTER EXTENSION citus UPDATE TO '8.0-4'; +ALTER EXTENSION citus UPDATE TO '8.0-5'; +ALTER EXTENSION citus UPDATE TO '8.0-6'; +ALTER EXTENSION citus UPDATE TO '8.0-7'; +ALTER EXTENSION citus UPDATE TO '8.0-8'; +ALTER EXTENSION citus UPDATE TO '8.0-9'; +ALTER EXTENSION citus UPDATE TO '8.0-10'; +ALTER EXTENSION citus UPDATE TO '8.0-11'; +ALTER EXTENSION citus UPDATE TO '8.0-12'; +ALTER EXTENSION citus UPDATE TO '8.0-13'; +ALTER EXTENSION citus UPDATE TO '8.1-1'; +ALTER EXTENSION citus UPDATE TO '8.2-1'; +ALTER EXTENSION citus UPDATE TO '8.2-2'; +ALTER EXTENSION citus UPDATE TO '8.2-3'; +ALTER EXTENSION citus UPDATE TO '8.2-4'; +ALTER EXTENSION citus UPDATE TO '8.3-1'; +ALTER EXTENSION citus UPDATE TO '9.0-1'; +ALTER EXTENSION citus UPDATE TO '9.0-2'; +ALTER EXTENSION citus UPDATE TO '9.1-1'; +ALTER EXTENSION citus UPDATE TO '9.2-1'; +ALTER EXTENSION citus UPDATE TO '9.2-2'; +-- Snapshot of state at 9.2-2 +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- + | event trigger citus_cascade_to_partition + | function alter_role_if_exists(text,text) + | function any_value(anyelement) + | function any_value_agg(anyelement,anyelement) + | function array_cat_agg(anyarray) + | function assign_distributed_transaction_id(integer,bigint,timestamp with time zone) + | function authinfo_valid(text) + | function broadcast_intermediate_result(text,text) + | function check_distributed_deadlocks() + | function citus_add_rebalance_strategy(name,regproc,regproc,regproc,real,real) + | function citus_blocking_pids(integer) + | function citus_create_restore_point(text) + | function citus_dist_stat_activity() + | function citus_drop_trigger() + | function citus_executor_name(integer) + | function citus_extradata_container(internal) + | function citus_finish_pg_upgrade() + | function citus_internal.find_groupid_for_node(text,integer) + | function citus_internal.pg_dist_node_trigger_func() + | function citus_internal.pg_dist_rebalance_strategy_enterprise_check() + | function citus_internal.pg_dist_rebalance_strategy_trigger_func() + | function citus_internal.pg_dist_shard_placement_trigger_func() + | function citus_internal.refresh_isolation_tester_prepared_statement() + | function citus_internal.replace_isolation_tester_func() + | function citus_internal.restore_isolation_tester_func() + | function citus_isolation_test_session_is_blocked(integer,integer[]) + | function citus_json_concatenate(json,json) + | function citus_json_concatenate_final(json) + | function citus_jsonb_concatenate(jsonb,jsonb) + | function citus_jsonb_concatenate_final(jsonb) + | function citus_node_capacity_1(integer) + | function citus_prepare_pg_upgrade() + | function citus_query_stats() + | function citus_relation_size(regclass) + | function citus_server_id() + | function citus_set_default_rebalance_strategy(text) + | function citus_shard_allowed_on_node_true(bigint,integer) + | function citus_shard_cost_1(bigint) + | function citus_shard_cost_by_disk_size(bigint) + | function citus_stat_statements() + | function citus_stat_statements_reset() + | function citus_table_is_visible(oid) + | function citus_table_size(regclass) + | function citus_text_send_as_jsonb(text) + | function citus_total_relation_size(regclass) + | function citus_truncate_trigger() + | function citus_validate_rebalance_strategy_functions(regproc,regproc,regproc) + | function citus_version() + | function citus_worker_stat_activity() + | function column_name_to_column(regclass,text) + | function column_to_column_name(regclass,text) + | function coord_combine_agg(oid,cstring,anyelement) + | function coord_combine_agg_ffunc(internal,oid,cstring,anyelement) + | function coord_combine_agg_sfunc(internal,oid,cstring,anyelement) + | function create_distributed_function(regprocedure,text,text) + | function create_distributed_table(regclass,text,citus.distribution_type,text) + | function create_intermediate_result(text,text) + | function create_reference_table(regclass) + | function distributed_tables_colocated(regclass,regclass) + | function dump_global_wait_edges() + | function dump_local_wait_edges() + | function fetch_intermediate_results(text[],text,integer) + | function get_all_active_transactions() + | function get_colocated_shard_array(bigint) + | function get_colocated_table_array(regclass) + | function get_current_transaction_id() + | function get_global_active_transactions() + | function get_rebalance_progress() + | function get_rebalance_table_shards_plan(regclass,real,integer,bigint[],boolean,name) + | function get_shard_id_for_distribution_column(regclass,"any") + | function isolate_tenant_to_new_shard(regclass,"any",text) + | function json_cat_agg(json) + | function jsonb_cat_agg(jsonb) + | function lock_relation_if_exists(text,text) + | function lock_shard_metadata(integer,bigint[]) + | function lock_shard_resources(integer,bigint[]) + | function mark_tables_colocated(regclass,regclass[]) + | function master_activate_node(text,integer) + | function master_add_inactive_node(text,integer,integer,noderole,name) + | function master_add_node(text,integer,integer,noderole,name) + | function master_add_secondary_node(text,integer,text,integer,name) + | function master_append_table_to_shard(bigint,text,text,integer) + | function master_apply_delete_command(text) + | function master_conninfo_cache_invalidate() + | function master_copy_shard_placement(bigint,text,integer,text,integer,boolean,citus.shard_transfer_mode) + | function master_create_distributed_table(regclass,text,citus.distribution_type) + | function master_create_empty_shard(text) + | function master_create_worker_shards(text,integer,integer) + | function master_disable_node(text,integer) + | function master_dist_local_group_cache_invalidate() + | function master_dist_node_cache_invalidate() + | function master_dist_object_cache_invalidate() + | function master_dist_partition_cache_invalidate() + | function master_dist_placement_cache_invalidate() + | function master_dist_shard_cache_invalidate() + | function master_drain_node(text,integer,citus.shard_transfer_mode,name) + | function master_drop_all_shards(regclass,text,text) + | function master_drop_sequences(text[]) + | function master_get_active_worker_nodes() + | function master_get_new_placementid() + | function master_get_new_shardid() + | function master_get_table_ddl_events(text) + | function master_get_table_metadata(text) + | function master_modify_multiple_shards(text) + | function master_move_shard_placement(bigint,text,integer,text,integer,citus.shard_transfer_mode) + | function master_remove_distributed_table_metadata_from_workers(regclass,text,text) + | function master_remove_node(text,integer) + | function master_remove_partition_metadata(regclass,text,text) + | function master_run_on_worker(text[],integer[],text[],boolean) + | function master_set_node_property(text,integer,text,boolean) + | function master_unmark_object_distributed(oid,oid,integer) + | function master_update_node(integer,text,integer,boolean,integer) + | function master_update_shard_statistics(bigint) + | function master_update_table_statistics(regclass) + | function poolinfo_valid(text) + | function read_intermediate_result(text,citus_copy_format) + | function read_intermediate_results(text[],citus_copy_format) + | function rebalance_table_shards(regclass,real,integer,bigint[],citus.shard_transfer_mode,boolean,name) + | function recover_prepared_transactions() + | function relation_is_a_known_shard(regclass) + | function replicate_table_shards(regclass,integer,integer,bigint[],citus.shard_transfer_mode) + | function role_exists(name) + | function run_command_on_colocated_placements(regclass,regclass,text,boolean) + | function run_command_on_placements(regclass,text,boolean) + | function run_command_on_shards(regclass,text,boolean) + | function run_command_on_workers(text,boolean) + | function shard_name(regclass,bigint) + | function start_metadata_sync_to_node(text,integer) + | function stop_metadata_sync_to_node(text,integer) + | function task_tracker_assign_task(bigint,integer,text) + | function task_tracker_cleanup_job(bigint) + | function task_tracker_conninfo_cache_invalidate() + | function task_tracker_task_status(bigint,integer) + | function upgrade_to_reference_table(regclass) + | function worker_append_table_to_shard(text,text,text,integer) + | function worker_apply_inter_shard_ddl_command(bigint,text,bigint,text,text) + | function worker_apply_sequence_command(text) + | function worker_apply_sequence_command(text,regtype) + | function worker_apply_shard_ddl_command(bigint,text) + | function worker_apply_shard_ddl_command(bigint,text,text) + | function worker_cleanup_job_schema_cache() + | function worker_create_or_replace_object(text) + | function worker_create_schema(bigint,text) + | function worker_create_truncate_trigger(regclass) + | function worker_drop_distributed_table(text) + | function worker_execute_sql_task(bigint,integer,text,boolean) + | function worker_fetch_foreign_file(text,text,bigint,text[],integer[]) + | function worker_fetch_partition_file(bigint,integer,integer,integer,text,integer) + | function worker_hash("any") + | function worker_hash_partition_table(bigint,integer,text,text,oid,anyarray) + | function worker_merge_files_and_run_query(bigint,integer,text,text) + | function worker_merge_files_into_table(bigint,integer,text[],text[]) + | function worker_partial_agg(oid,anyelement) + | function worker_partial_agg_ffunc(internal) + | function worker_partial_agg_sfunc(internal,oid,anyelement) + | function worker_partition_query_result(text,text,integer,citus.distribution_type,text[],text[],boolean) + | function worker_range_partition_table(bigint,integer,text,text,oid,anyarray) + | function worker_repartition_cleanup(bigint) + | schema citus + | schema citus_internal + | sequence pg_dist_colocationid_seq + | sequence pg_dist_groupid_seq + | sequence pg_dist_node_nodeid_seq + | sequence pg_dist_placement_placementid_seq + | sequence pg_dist_shardid_seq + | table citus.pg_dist_object + | table pg_dist_authinfo + | table pg_dist_colocation + | table pg_dist_local_group + | table pg_dist_node + | table pg_dist_node_metadata + | table pg_dist_partition + | table pg_dist_placement + | table pg_dist_poolinfo + | table pg_dist_rebalance_strategy + | table pg_dist_shard + | table pg_dist_transaction + | type citus.distribution_type + | type citus.shard_transfer_mode + | type citus_copy_format + | type noderole + | view citus_dist_stat_activity + | view citus_lock_waits + | view citus_shard_indexes_on_worker + | view citus_shards_on_worker + | view citus_stat_statements + | view citus_worker_stat_activity + | view pg_dist_shard_placement +(188 rows) + +-- Test downgrade to 9.2-2 from 9.2-4 +ALTER EXTENSION citus UPDATE TO '9.2-4'; +ALTER EXTENSION citus UPDATE TO '9.2-2'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + +/* + * As we mistakenly bumped schema version to 9.3-1 in a bad release, we support + * updating citus schema from 9.3-1 to 9.2-4, but we do not support updates to 9.3-1. + * + * Hence the query below should fail. + */ +ALTER EXTENSION citus UPDATE TO '9.3-1'; +ERROR: extension "citus" has no update path from version "9.2-2" to version "9.3-1" +ALTER EXTENSION citus UPDATE TO '9.2-4'; +-- Snapshot of state at 9.2-4 +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + +-- Test downgrade to 9.2-4 from 9.3-2 +ALTER EXTENSION citus UPDATE TO '9.3-2'; +ALTER EXTENSION citus UPDATE TO '9.2-4'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + +-- Snapshot of state at 9.3-2 +ALTER EXTENSION citus UPDATE TO '9.3-2'; +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- + | function citus_remote_connection_stats() + | function replicate_reference_tables() + | function truncate_local_data_after_distributing_table(regclass) + | function update_distributed_table_colocation(regclass,text) + | function worker_create_or_alter_role(text,text,text) +(5 rows) + +-- Test downgrade to 9.3-2 from 9.4-1 +ALTER EXTENSION citus UPDATE TO '9.4-1'; +ALTER EXTENSION citus UPDATE TO '9.3-2'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + +-- Snapshot of state at 9.4-1 +ALTER EXTENSION citus UPDATE TO '9.4-1'; +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- + | function worker_last_saved_explain_analyze() + | function worker_save_query_explain_analyze(text,jsonb) +(2 rows) + +-- Test downgrade to 9.4-1 from 9.5-1 +ALTER EXTENSION citus UPDATE TO '9.5-1'; +BEGIN; + SELECT master_add_node('localhost', :master_port, groupId=>0); + master_add_node +--------------------------------------------------------------------- + 1 +(1 row) + + CREATE TABLE citus_local_table (a int); + SELECT create_citus_local_table('citus_local_table'); + create_citus_local_table +--------------------------------------------------------------------- + +(1 row) + + -- downgrade from 9.5-1 to 9.4-1 should fail as we have a citus local table + ALTER EXTENSION citus UPDATE TO '9.4-1'; +ERROR: citus local tables are introduced in Citus 9.5 +HINT: To downgrade Citus to an older version, you should first convert each citus local table to a postgres table by executing SELECT undistribute_table("%s") +CONTEXT: PL/pgSQL function inline_code_block line 11 at RAISE +ROLLBACK; +-- now we can downgrade as there is no citus local table +ALTER EXTENSION citus UPDATE TO '9.4-1'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + +-- Snapshot of state at 9.5-1 +ALTER EXTENSION citus UPDATE TO '9.5-1'; +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- + function master_drop_sequences(text[]) | + function task_tracker_assign_task(bigint,integer,text) | + function task_tracker_cleanup_job(bigint) | + function task_tracker_conninfo_cache_invalidate() | + function task_tracker_task_status(bigint,integer) | + function worker_execute_sql_task(bigint,integer,text,boolean) | + function worker_merge_files_and_run_query(bigint,integer,text,text) | + | function create_citus_local_table(regclass) + | function undistribute_table(regclass) + | function worker_record_sequence_dependency(regclass,regclass,name) +(10 rows) + +-- Test downgrade to 9.5-1 from 10.0-1 +ALTER EXTENSION citus UPDATE TO '10.0-1'; +ALTER EXTENSION citus UPDATE TO '9.5-1'; +-- Should be empty result since upgrade+downgrade should be a no-op +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- +(0 rows) + +-- Snapshot of state at 10.0-1 +ALTER EXTENSION citus UPDATE TO '10.0-1'; +SELECT * FROM print_extension_changes(); + previous_object | current_object +--------------------------------------------------------------------- + | event trigger cstore_ddl_event_end + | foreign-data wrapper cstore_fdw + | function citus_internal.cstore_ensure_objects_exist() + | function cstore.cstore_ddl_event_end_trigger() + | function cstore.cstore_fdw_handler() + | function cstore.cstore_fdw_validator(text[],oid) + | function cstore_table_size(regclass) + | schema cstore + | table cstore.cstore_data_files + | table cstore.cstore_skipnodes + | table cstore.cstore_stripes + | view cstore.cstore_options +(12 rows) + +DROP TABLE prev_objects, extension_diff; +-- show running version +SHOW citus.version; + citus.version +--------------------------------------------------------------------- + 10.0devel +(1 row) + +-- ensure no objects were created outside pg_catalog +SELECT COUNT(*) +FROM pg_depend AS pgd, + pg_extension AS pge, + LATERAL pg_identify_object(pgd.classid, pgd.objid, pgd.objsubid) AS pgio +WHERE pgd.refclassid = 'pg_extension'::regclass AND + pgd.refobjid = pge.oid AND + pge.extname = 'citus' AND + pgio.schema NOT IN ('pg_catalog', 'citus', 'citus_internal', 'test', 'cstore'); + count +--------------------------------------------------------------------- + 0 +(1 row) + +-- see incompatible version errors out +RESET citus.enable_version_checks; +DROP EXTENSION citus; +CREATE EXTENSION citus VERSION '7.0-1'; +ERROR: specified version incompatible with loaded Citus library +DETAIL: Loaded library requires 10.0, but 7.0-1 was specified. +HINT: If a newer library is present, restart the database and try the command again. +-- Test non-distributed queries work even in version mismatch +SET citus.enable_version_checks TO 'false'; +CREATE EXTENSION citus VERSION '7.1-1'; +SET citus.enable_version_checks TO 'true'; +-- Test CREATE TABLE +CREATE TABLE version_mismatch_table(column1 int); +-- Test COPY +\copy version_mismatch_table FROM STDIN; +-- Test INSERT +INSERT INTO version_mismatch_table(column1) VALUES(5); +-- Test SELECT +SELECT * FROM version_mismatch_table ORDER BY column1; + column1 +--------------------------------------------------------------------- + 0 + 1 + 2 + 3 + 4 + 5 +(6 rows) + +-- Test SELECT from pg_catalog +SELECT d.datname as "Name", + pg_catalog.pg_get_userbyid(d.datdba) as "Owner", + pg_catalog.array_to_string(d.datacl, E'\n') AS "Access privileges" +FROM pg_catalog.pg_database d +ORDER BY 1; + Name | Owner | Access privileges +--------------------------------------------------------------------- + postgres | postgres | + regression | postgres | + template0 | postgres | =c/postgres + + | | postgres=CTc/postgres + template1 | postgres | =c/postgres + + | | postgres=CTc/postgres +(4 rows) + +-- We should not distribute table in version mistmatch +SELECT create_distributed_table('version_mismatch_table', 'column1'); +ERROR: loaded Citus library version differs from installed extension version +DETAIL: Loaded library requires 10.0, but the installed extension version is 7.1-1. +HINT: Run ALTER EXTENSION citus UPDATE and try again. +-- This function will cause fail in next ALTER EXTENSION +CREATE OR REPLACE FUNCTION pg_catalog.master_dist_authinfo_cache_invalidate() +RETURNS void LANGUAGE plpgsql +AS $function$ +BEGIN +END; +$function$; +SET citus.enable_version_checks TO 'false'; +-- This will fail because of previous function declaration +ALTER EXTENSION citus UPDATE TO '8.1-1'; +ERROR: function "master_dist_authinfo_cache_invalidate" already exists with same argument types +-- We can DROP problematic function and continue ALTER EXTENSION even when version checks are on +SET citus.enable_version_checks TO 'true'; +DROP FUNCTION pg_catalog.master_dist_authinfo_cache_invalidate(); +SET citus.enable_version_checks TO 'false'; +ALTER EXTENSION citus UPDATE TO '8.1-1'; +-- Test updating to the latest version without specifying the version number +ALTER EXTENSION citus UPDATE; +-- re-create in newest version +DROP EXTENSION citus; +\c +CREATE EXTENSION citus; +-- test cache invalidation in workers +\c - - - :worker_1_port +DROP EXTENSION citus; +SET citus.enable_version_checks TO 'false'; +CREATE EXTENSION citus VERSION '7.0-1'; +SET citus.enable_version_checks TO 'true'; +-- during ALTER EXTENSION, we should invalidate the cache +ALTER EXTENSION citus UPDATE; +-- if cache is invalidated succesfull, this \d should work without any problem +\d + List of relations + Schema | Name | Type | Owner +--------------------------------------------------------------------- +(0 rows) + +\c - - - :master_port +-- test https://github.com/citusdata/citus/issues/3409 +CREATE USER testuser2 SUPERUSER; +NOTICE: not propagating CREATE ROLE/USER commands to worker nodes +HINT: Connect to worker nodes directly to manually create all necessary users and roles. +SET ROLE testuser2; +DROP EXTENSION Citus; +-- Loop until we see there's no maintenance daemon running +DO $$begin + for i in 0 .. 100 loop + if i = 100 then raise 'Waited too long'; end if; + PERFORM pg_stat_clear_snapshot(); + perform * from pg_stat_activity where application_name = 'Citus Maintenance Daemon'; + if not found then exit; end if; + perform pg_sleep(0.1); + end loop; +end$$; +SELECT datid, datname, usename FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + datid | datname | usename +--------------------------------------------------------------------- +(0 rows) + +CREATE EXTENSION Citus; +-- Loop until we there's a maintenance daemon running +DO $$begin + for i in 0 .. 100 loop + if i = 100 then raise 'Waited too long'; end if; + PERFORM pg_stat_clear_snapshot(); + perform * from pg_stat_activity where application_name = 'Citus Maintenance Daemon'; + if found then exit; end if; + perform pg_sleep(0.1); + end loop; +end$$; +SELECT datid, datname, usename FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + datid | datname | usename +--------------------------------------------------------------------- + 16384 | regression | testuser2 +(1 row) + +RESET ROLE; +-- check that maintenance daemon gets (re-)started for the right user +DROP EXTENSION citus; +CREATE USER testuser SUPERUSER; +SET ROLE testuser; +CREATE EXTENSION citus; +SELECT datname, current_database(), + usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus') +FROM test.maintenance_worker(); + datname | current_database | usename | extowner +--------------------------------------------------------------------- + regression | regression | testuser | testuser +(1 row) + +-- and recreate as the right owner +RESET ROLE; +DROP EXTENSION citus; +CREATE EXTENSION citus; +-- Check that maintenance daemon can also be started in another database +CREATE DATABASE another; +NOTICE: Citus partially supports CREATE DATABASE for distributed databases +DETAIL: Citus does not propagate CREATE DATABASE command to workers +HINT: You can manually create a database and its extensions on workers. +\c another +CREATE EXTENSION citus; +CREATE SCHEMA test; +:create_function_test_maintenance_worker +-- see that the daemon started +SELECT datname, current_database(), + usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus') +FROM test.maintenance_worker(); + datname | current_database | usename | extowner +--------------------------------------------------------------------- + another | another | postgres | postgres +(1 row) + +-- Test that database with active worker can be dropped. +\c regression +CREATE SCHEMA test_daemon; +-- we create a similar function on the regression database +-- note that this function checks for the existence of the daemon +-- when not found, returns true else tries for 5 times and +-- returns false +CREATE OR REPLACE FUNCTION test_daemon.maintenance_daemon_died(p_dbname text) + RETURNS boolean + LANGUAGE plpgsql +AS $$ +DECLARE + activity record; +BEGIN + PERFORM pg_stat_clear_snapshot(); + SELECT * INTO activity FROM pg_stat_activity + WHERE application_name = 'Citus Maintenance Daemon' AND datname = p_dbname; + IF activity.pid IS NULL THEN + RETURN true; + ELSE + RETURN false; + END IF; +END; +$$; +-- drop the database and see that the daemon is dead +DROP DATABASE another; +SELECT + * +FROM + test_daemon.maintenance_daemon_died('another'); + maintenance_daemon_died +--------------------------------------------------------------------- + t +(1 row) + +-- we don't need the schema and the function anymore +DROP SCHEMA test_daemon CASCADE; +NOTICE: drop cascades to function test_daemon.maintenance_daemon_died(text) +-- verify citus does not crash while creating a table when run against an older worker +-- create_distributed_table piggybacks multiple commands into single one, if one worker +-- did not have the required UDF it should fail instead of crash. +-- create a test database, configure citus with single node +CREATE DATABASE another; +NOTICE: Citus partially supports CREATE DATABASE for distributed databases +DETAIL: Citus does not propagate CREATE DATABASE command to workers +HINT: You can manually create a database and its extensions on workers. +\c - - - :worker_1_port +CREATE DATABASE another; +NOTICE: Citus partially supports CREATE DATABASE for distributed databases +DETAIL: Citus does not propagate CREATE DATABASE command to workers +HINT: You can manually create a database and its extensions on workers. +\c - - - :master_port +\c another +CREATE EXTENSION citus; +SET citus.enable_object_propagation TO off; -- prevent distributed transactions during add node +SELECT FROM master_add_node('localhost', :worker_1_port); +WARNING: citus.enable_object_propagation is off, not creating distributed objects on worker +DETAIL: distributed objects are only kept in sync when citus.enable_object_propagation is set to on. Newly activated nodes will not get these objects created +-- +(1 row) + +\c - - - :worker_1_port +CREATE EXTENSION citus; +ALTER FUNCTION assign_distributed_transaction_id(initiator_node_identifier integer, transaction_number bigint, transaction_stamp timestamp with time zone) +RENAME TO dummy_assign_function; +\c - - - :master_port +SET citus.shard_replication_factor to 1; +-- create_distributed_table command should fail +CREATE TABLE t1(a int, b int); +SET client_min_messages TO ERROR; +DO $$ +BEGIN + BEGIN + SELECT create_distributed_table('t1', 'a'); + EXCEPTION WHEN OTHERS THEN + RAISE 'create distributed table failed'; + END; +END; +$$; +ERROR: create distributed table failed +CONTEXT: PL/pgSQL function inline_code_block line 6 at RAISE +\c regression +\c - - - :master_port +DROP DATABASE another; +\c - - - :worker_1_port +DROP DATABASE another; +\c - - - :master_port +-- only the regression database should have a maintenance daemon +SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + count +--------------------------------------------------------------------- + 1 +(1 row) + +-- recreate the extension immediately after the maintenancae daemon errors +SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + pg_cancel_backend +--------------------------------------------------------------------- + t +(1 row) + +DROP EXTENSION citus; +CREATE EXTENSION citus; +-- wait for maintenance daemon restart +SELECT datname, current_database(), + usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus') +FROM test.maintenance_worker(); + datname | current_database | usename | extowner +--------------------------------------------------------------------- + regression | regression | postgres | postgres +(1 row) + +-- confirm that there is only one maintenance daemon +SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + count +--------------------------------------------------------------------- + 1 +(1 row) + +-- kill the maintenance daemon +SELECT pg_cancel_backend(pid) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + pg_cancel_backend +--------------------------------------------------------------------- + t +(1 row) + +-- reconnect +\c - - - :master_port +-- run something that goes through planner hook and therefore kicks of maintenance daemon +SELECT 1; + ?column? +--------------------------------------------------------------------- + 1 +(1 row) + +-- wait for maintenance daemon restart +SELECT datname, current_database(), + usename, (SELECT extowner::regrole::text FROM pg_extension WHERE extname = 'citus') +FROM test.maintenance_worker(); + datname | current_database | usename | extowner +--------------------------------------------------------------------- + regression | regression | postgres | postgres +(1 row) + +-- confirm that there is only one maintenance daemon +SELECT count(*) FROM pg_stat_activity WHERE application_name = 'Citus Maintenance Daemon'; + count +--------------------------------------------------------------------- + 1 +(1 row) + +DROP TABLE version_mismatch_table; diff --git a/src/test/regress/sql/multi_extension.sql b/src/test/regress/sql/multi_extension.sql index 4444bb5f4..515c66b71 100644 --- a/src/test/regress/sql/multi_extension.sql +++ b/src/test/regress/sql/multi_extension.sql @@ -6,6 +6,11 @@ -- It'd be nice to script generation of this file, but alas, that's -- not done yet. +-- differentiate the output file for pg11 and versions above, with regards to objects +-- created per citus version depending on the postgres version. Upgrade tests verify the +-- objects are added in citus_finish_pg_upgrade() +SHOW server_version \gset +SELECT substring(:'server_version', '\d+')::int > 11 AS version_above_eleven; SET citus.next_shard_id TO 580000; From 725f4a37d04ae2e3e1a237296b2975e8c4b9da4f Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Mon, 16 Nov 2020 15:02:33 +0100 Subject: [PATCH 120/124] change configure to not have options --- Makefile.global.in | 3 +- configure | 76 ++---------------------- configure.in | 21 ++----- src/backend/columnar/cstore_customscan.c | 4 +- src/backend/columnar/cstore_tableam.c | 2 +- src/backend/columnar/mod.c | 23 +++---- src/include/citus_config.h.in | 11 +--- src/include/citus_version.h.in | 5 +- src/include/columnar/cstore_tableam.h | 2 +- src/test/regress/Makefile | 10 ++-- 10 files changed, 32 insertions(+), 125 deletions(-) diff --git a/Makefile.global.in b/Makefile.global.in index aba3be6f6..318766380 100644 --- a/Makefile.global.in +++ b/Makefile.global.in @@ -92,8 +92,7 @@ endif override CPPFLAGS := @CPPFLAGS@ @CITUS_CPPFLAGS@ -I '${citus_abs_top_srcdir}/src/include' -I'${citus_top_builddir}/src/include' $(CPPFLAGS) override LDFLAGS += @LDFLAGS@ @CITUS_LDFLAGS@ -USE_FDW:=@USE_FDW@ -USE_TABLEAM:=@USE_TABLEAM@ +HAS_TABLEAM:=@HAS_TABLEAM@ # optional file with user defined, additional, rules -include ${citus_abs_srcdir}/src/Makefile.custom diff --git a/configure b/configure index e29228855..619fef623 100755 --- a/configure +++ b/configure @@ -622,8 +622,7 @@ ac_includes_default="\ ac_subst_vars='LTLIBOBJS LIBOBJS -USE_TABLEAM -USE_FDW +HAS_TABLEAM HAS_DOTGIT POSTGRES_BUILDDIR POSTGRES_SRCDIR @@ -692,8 +691,6 @@ with_extra_version enable_coverage with_libcurl with_reports_hostname -with_columnar_tableam -with_columnar_fdw ' ac_precious_vars='build_alias host_alias @@ -1327,9 +1324,6 @@ Optional Packages: --with-reports-hostname=HOSTNAME Use HOSTNAME as hostname for statistics collection and update checks - --without-columnar-tableam - compile with table access methods for columnar - --without-columnar-fdw compile with foreign data wrappers for columnar Some influential environment variables: PG_CONFIG Location to find pg_config for target PostgreSQL instalation @@ -4476,69 +4470,13 @@ _ACEOF if test "$version_num" != '11'; then + HAS_TABLEAM=yes - - -# Check whether --with-columnar-tableam was given. -if test "${with_columnar_tableam+set}" = set; then : - withval=$with_columnar_tableam; - case $withval in - yes) - USE_TABLEAM=yes - ;; - no) - : - ;; - *) - as_fn_error $? "no argument expected for --with-columnar-tableam option" "$LINENO" 5 - ;; - esac +$as_echo "#define HAS_TABLEAM 1" >>confdefs.h else - with_columnar_tableam=yes -USE_TABLEAM=yes -fi - - -else - { $as_echo "$as_me:${as_lineno-$LINENO}: postgres does not support table access methodds" >&5 -$as_echo "$as_me: postgres does not support table access methodds" >&6;} -fi; - -if test "$USE_TABLEAM" = yes; then - -$as_echo "#define USE_TABLEAM 1" >>confdefs.h - -fi; - - - - -# Check whether --with-columnar-fdw was given. -if test "${with_columnar_fdw+set}" = set; then : - withval=$with_columnar_fdw; - case $withval in - yes) - USE_FDW=yes - ;; - no) - : - ;; - *) - as_fn_error $? "no argument expected for --with-columnar-fdw option" "$LINENO" 5 - ;; - esac - -else - with_columnar_fdw=yes -USE_FDW=yes -fi - - -if test "$USE_FDW" = yes; then - -$as_echo "#define USE_FDW 1" >>confdefs.h - + { $as_echo "$as_me:${as_lineno-$LINENO}: postgres version does not support table access methodds" >&5 +$as_echo "$as_me: postgres version does not support table access methodds" >&6;} fi; # Check if git is installed, when installed the gitref of the checkout will be baked in the application @@ -4616,9 +4554,7 @@ POSTGRES_BUILDDIR="$POSTGRES_BUILDDIR" HAS_DOTGIT="$HAS_DOTGIT" -USE_FDW="$USE_FDW" - -USE_TABLEAM="$USE_TABLEAM" +HAS_TABLEAM="$HAS_TABLEAM" ac_config_files="$ac_config_files Makefile.global" diff --git a/configure.in b/configure.in index b31fc4ff5..07e87a878 100644 --- a/configure.in +++ b/configure.in @@ -213,22 +213,10 @@ AC_DEFINE_UNQUOTED(REPORTS_BASE_URL, "$REPORTS_BASE_URL", [Base URL for statistics collection and update checks]) if test "$version_num" != '11'; then - PGAC_ARG_BOOL(with, columnar-tableam, yes, - [compile with table access methods for columnar], - [USE_TABLEAM=yes]) + HAS_TABLEAM=yes + AC_DEFINE([HAS_TABLEAM], 1, [Define to 1 to build with table access method support, pg12 and up]) else - AC_MSG_NOTICE([postgres does not support table access methodds]) -fi; - -if test "$USE_TABLEAM" = yes; then - AC_DEFINE([USE_TABLEAM], 1, [Define to 1 to build with table access method support. (--with-columnar-tableam)]) -fi; - -PGAC_ARG_BOOL(with, columnar-fdw, yes, - [compile with foreign data wrappers for columnar], - [USE_FDW=yes]) -if test "$USE_FDW" = yes; then - AC_DEFINE([USE_FDW], 1, [Define to 1 to build with foreign datawrapper support. (--with-columnar-fdw)]) + AC_MSG_NOTICE([postgres version does not support table access methodds]) fi; # Check if git is installed, when installed the gitref of the checkout will be baked in the application @@ -241,8 +229,7 @@ AC_SUBST(CITUS_LDFLAGS, "$LIBS $CITUS_LDFLAGS") AC_SUBST(POSTGRES_SRCDIR, "$POSTGRES_SRCDIR") AC_SUBST(POSTGRES_BUILDDIR, "$POSTGRES_BUILDDIR") AC_SUBST(HAS_DOTGIT, "$HAS_DOTGIT") -AC_SUBST(USE_FDW, "$USE_FDW") -AC_SUBST(USE_TABLEAM, "$USE_TABLEAM") +AC_SUBST(HAS_TABLEAM, "$HAS_TABLEAM") AC_CONFIG_FILES([Makefile.global]) AC_CONFIG_HEADERS([src/include/citus_config.h] [src/include/citus_version.h]) diff --git a/src/backend/columnar/cstore_customscan.c b/src/backend/columnar/cstore_customscan.c index 3cca3ad29..5b5ffe39f 100644 --- a/src/backend/columnar/cstore_customscan.c +++ b/src/backend/columnar/cstore_customscan.c @@ -11,7 +11,7 @@ */ #include "citus_version.h" -#if USE_TABLEAM +#if HAS_TABLEAM #include "postgres.h" @@ -426,4 +426,4 @@ CStoreScan_ReScanCustomScan(CustomScanState *node) } -#endif /* USE_TABLEAM */ +#endif /* HAS_TABLEAM */ diff --git a/src/backend/columnar/cstore_tableam.c b/src/backend/columnar/cstore_tableam.c index d7c3aff11..6ede404bb 100644 --- a/src/backend/columnar/cstore_tableam.c +++ b/src/backend/columnar/cstore_tableam.c @@ -1,5 +1,5 @@ #include "citus_version.h" -#if USE_TABLEAM +#if HAS_TABLEAM #include "postgres.h" diff --git a/src/backend/columnar/mod.c b/src/backend/columnar/mod.c index ee3ba6d98..ad4605e24 100644 --- a/src/backend/columnar/mod.c +++ b/src/backend/columnar/mod.c @@ -18,27 +18,22 @@ #include "citus_version.h" #include "columnar/cstore.h" +#include "columnar/cstore_fdw.h" #include "columnar/mod.h" -#ifdef USE_TABLEAM +#ifdef HAS_TABLEAM #include "columnar/cstore_tableam.h" #endif -#ifdef USE_FDW -#include "columnar/cstore_fdw.h" -#endif void columnar_init(void) { cstore_init(); - -#ifdef USE_TABLEAM - cstore_tableam_init(); -#endif - -#ifdef USE_FDW cstore_fdw_init(); + +#ifdef HAS_TABLEAM + cstore_tableam_init(); #endif } @@ -46,11 +41,9 @@ columnar_init(void) void columnar_fini(void) { -#if USE_TABLEAM + cstore_fdw_finish(); + +#if HAS_TABLEAM cstore_tableam_finish(); #endif - -#ifdef USE_FDW - cstore_fdw_finish(); -#endif } diff --git a/src/include/citus_config.h.in b/src/include/citus_config.h.in index 679f91c09..64854b3ca 100644 --- a/src/include/citus_config.h.in +++ b/src/include/citus_config.h.in @@ -31,6 +31,9 @@ /* A string containing the version number, platform, and C compiler */ #undef CITUS_VERSION_STR +/* Define to 1 to build with table access method support, pg12 and up */ +#undef HAS_TABLEAM + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H @@ -87,11 +90,3 @@ /* Define to 1 if you have the ANSI C header files. */ #undef STDC_HEADERS - -/* Define to 1 to build with foreign datawrapper support. - (--with-columnar-fdw) */ -#undef USE_FDW - -/* Define to 1 to build with table access method support. - (--with-columnar-tableam) */ -#undef USE_TABLEAM diff --git a/src/include/citus_version.h.in b/src/include/citus_version.h.in index 778d59233..939b0f4b9 100644 --- a/src/include/citus_version.h.in +++ b/src/include/citus_version.h.in @@ -27,8 +27,5 @@ /* Base URL for statistics collection and update checks */ #undef REPORTS_BASE_URL -/* columnar foreign data wrapper capability */ -#undef USE_FDW - /* columnar table access method capability */ -#undef USE_TABLEAM +#undef HAS_TABLEAM diff --git a/src/include/columnar/cstore_tableam.h b/src/include/columnar/cstore_tableam.h index 7bd879068..10b9a3874 100644 --- a/src/include/columnar/cstore_tableam.h +++ b/src/include/columnar/cstore_tableam.h @@ -1,5 +1,5 @@ #include "citus_version.h" -#if USE_TABLEAM +#if HAS_TABLEAM #include "postgres.h" #include "fmgr.h" diff --git a/src/test/regress/Makefile b/src/test/regress/Makefile index cf79f7b06..178bb6356 100644 --- a/src/test/regress/Makefile +++ b/src/test/regress/Makefile @@ -163,11 +163,11 @@ check-follower-cluster: all COLUMNAR_SCHEDULES = COLUMNAR_ISOLATION_SCHEDULES = -ifeq ($(USE_FDW),yes) - COLUMNAR_SCHEDULES += columnar_fdw_schedule - COLUMNAR_ISOLATION_SCHEDULES += columnar_fdw_isolation_schedule -endif -ifeq ($(USE_TABLEAM),yes) +# even though we always add the fdw schedules, keep them separate from the declaration +# above for easy removabl when fdw support is removed +COLUMNAR_SCHEDULES += columnar_fdw_schedule +COLUMNAR_ISOLATION_SCHEDULES += columnar_fdw_isolation_schedule +ifeq ($(HAS_TABLEAM),yes) COLUMNAR_SCHEDULES += columnar_am_schedule COLUMNAR_ISOLATION_SCHEDULES += columnar_am_isolation_schedule endif From 97cba2d5b697d43697c2a518f56c2d0e3318393e Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 17 Nov 2020 11:54:23 -0800 Subject: [PATCH 121/124] Implements write state management for tuple inserts. TableAM API doesn't allow us to pass around a state variable along all of the tuple inserts belonging to the same command. We require this in columnar store, since we batch them, and when we have enough rows we flush them as stripes. To do that, we keep a (relfilenode) -> stack of (subxact id, TableWriteState) global mapping. **Inserts** Whenever we want to insert a tuple, we look up for the relation's relfilenode in this mapping. If top of the stack matches current subtransaction, we us the existing TableWriteState. Otherwise, we allocate a new TableWriteState and push it on top of stack. **(Sub)Transaction Commit/Aborts** When the subtransaction or transaction is committed, we flush and pop all entries matching current SubTransactionId. When the subtransaction or transaction is committed, we pop all entries matching current SubTransactionId and discard them without flushing. **Reads** Since we might have unwritten rows which needs to be read by a table scan, we flush write states on SELECTs. Since flushing the write state of upper transactions in a subtransaction will cause metadata being written in wrong subtransaction, we ERROR out if any of the upper subtransactions have unflushed rows. **Table Drops** We record in which subtransaction the table was dropped. When committing a subtransaction in which table was dropped, we propagate the drop to upper transaction. When aborting a subtransaction in which table was dropped, we mark table as not deleted. --- src/backend/columnar/cstore_fdw.c | 10 +- src/backend/columnar/cstore_metadata_tables.c | 9 +- src/backend/columnar/cstore_tableam.c | 206 +++++----- src/backend/columnar/cstore_writer.c | 66 +-- src/backend/columnar/write_state_management.c | 384 ++++++++++++++++++ src/include/columnar/cstore.h | 24 +- src/test/regress/columnar_am_schedule | 2 + src/test/regress/expected/am_recursive.out | 252 ++++++++++++ src/test/regress/expected/am_rollback.out | 6 + src/test/regress/expected/am_transactions.out | 142 +++++++ src/test/regress/expected/am_trigger.out | 94 +++++ .../regress/expected/am_vacuum_vs_insert.out | 2 +- src/test/regress/sql/am_recursive.sql | 143 +++++++ src/test/regress/sql/am_rollback.sql | 1 + src/test/regress/sql/am_transactions.sql | 70 ++++ src/test/regress/sql/am_trigger.sql | 83 ++++ 16 files changed, 1354 insertions(+), 140 deletions(-) create mode 100644 src/backend/columnar/write_state_management.c create mode 100644 src/test/regress/expected/am_recursive.out create mode 100644 src/test/regress/expected/am_transactions.out create mode 100644 src/test/regress/sql/am_recursive.sql create mode 100644 src/test/regress/sql/am_transactions.sql diff --git a/src/backend/columnar/cstore_fdw.c b/src/backend/columnar/cstore_fdw.c index 19b3d1847..634a76f5d 100644 --- a/src/backend/columnar/cstore_fdw.c +++ b/src/backend/columnar/cstore_fdw.c @@ -549,7 +549,7 @@ CopyIntoCStoreTable(const CopyStmt *copyStatement, const char *queryString) #endif /* init state to write to the cstore file */ - writeState = CStoreBeginWrite(relation, + writeState = CStoreBeginWrite(relation->rd_node, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, @@ -1992,13 +1992,16 @@ CStoreBeginForeignInsert(ModifyTableState *modifyTableState, ResultRelInfo *rela CStoreOptions *cstoreOptions = CStoreGetOptions(foreignTableOid); TupleDesc tupleDescriptor = RelationGetDescr(relationInfo->ri_RelationDesc); - TableWriteState *writeState = CStoreBeginWrite(relation, + TableWriteState *writeState = CStoreBeginWrite(relation->rd_node, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, tupleDescriptor); relationInfo->ri_FdwState = (void *) writeState; + + /* keep the lock */ + relation_close(relation, NoLock); } @@ -2055,10 +2058,7 @@ CStoreEndForeignInsert(EState *executorState, ResultRelInfo *relationInfo) /* writeState is NULL during Explain queries */ if (writeState != NULL) { - Relation relation = writeState->relation; - CStoreEndWrite(writeState); - heap_close(relation, RowExclusiveLock); } } diff --git a/src/backend/columnar/cstore_metadata_tables.c b/src/backend/columnar/cstore_metadata_tables.c index 301dd0c91..26e176535 100644 --- a/src/backend/columnar/cstore_metadata_tables.c +++ b/src/backend/columnar/cstore_metadata_tables.c @@ -134,18 +134,19 @@ InitCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCoun { NameData compressionName = { 0 }; - namestrcpy(&compressionName, CompressionTypeStr(compression)); - bool nulls[Natts_cstore_data_files] = { 0 }; Datum values[Natts_cstore_data_files] = { ObjectIdGetDatum(relfilenode), Int32GetDatum(blockRowCount), Int32GetDatum(stripeRowCount), - NameGetDatum(&compressionName), + 0, /* to be filled below */ Int32GetDatum(CSTORE_VERSION_MAJOR), Int32GetDatum(CSTORE_VERSION_MINOR) }; + namestrcpy(&compressionName, CompressionTypeStr(compression)); + values[Anum_cstore_data_files_compression - 1] = NameGetDatum(&compressionName); + DeleteDataFileMetadataRowIfExists(relfilenode); Oid cstoreDataFilesOid = CStoreDataFilesRelationId(); @@ -171,6 +172,7 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo Datum values[Natts_cstore_data_files] = { 0 }; bool isnull[Natts_cstore_data_files] = { 0 }; bool replace[Natts_cstore_data_files] = { 0 }; + bool changed = false; Relation cstoreDataFiles = heap_open(CStoreDataFilesRelationId(), RowExclusiveLock); TupleDesc tupleDescriptor = RelationGetDescr(cstoreDataFiles); @@ -192,7 +194,6 @@ UpdateCStoreDataFileMetadata(Oid relfilenode, int blockRowCount, int stripeRowCo Form_cstore_data_files metadata = (Form_cstore_data_files) GETSTRUCT(heapTuple); - bool changed = false; if (metadata->block_row_count != blockRowCount) { values[Anum_cstore_data_files_block_row_count - 1] = Int32GetDatum(blockRowCount); diff --git a/src/backend/columnar/cstore_tableam.c b/src/backend/columnar/cstore_tableam.c index 6ede404bb..1669b571c 100644 --- a/src/backend/columnar/cstore_tableam.c +++ b/src/backend/columnar/cstore_tableam.c @@ -68,10 +68,7 @@ typedef struct CStoreScanDescData typedef struct CStoreScanDescData *CStoreScanDesc; -static TableWriteState *CStoreWriteState = NULL; -static ExecutorEnd_hook_type PreviousExecutorEndHook = NULL; -static MemoryContext CStoreContext = NULL; -static object_access_hook_type prevObjectAccessHook = NULL; +static object_access_hook_type PrevObjectAccessHook = NULL; static ProcessUtility_hook_type PreviousProcessUtilityHook = NULL; /* forward declaration for static functions */ @@ -122,13 +119,13 @@ CStoreTableAMDefaultOptions() * CStoreTableAMGetOptions returns the options based on a relation. It is advised the * relation is a cstore table am table, if not it will raise an error */ -static CStoreOptions * -CStoreTableAMGetOptions(Relation rel) +CStoreOptions * +CStoreTableAMGetOptions(Oid relfilenode) { - Assert(rel != NULL); + Assert(OidIsValid(relfilenode)); CStoreOptions *cstoreOptions = palloc0(sizeof(CStoreOptions)); - DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + DataFileMetadata *metadata = ReadDataFileMetadata(relfilenode, false); cstoreOptions->compressionType = metadata->compression; cstoreOptions->stripeRowCount = metadata->stripeRowCount; cstoreOptions->blockRowCount = metadata->blockRowCount; @@ -136,66 +133,6 @@ CStoreTableAMGetOptions(Relation rel) } -static MemoryContext -GetCStoreMemoryContext() -{ - if (CStoreContext == NULL) - { - CStoreContext = AllocSetContextCreate(TopMemoryContext, "cstore context", - ALLOCSET_DEFAULT_SIZES); - } - return CStoreContext; -} - - -static void -ResetCStoreMemoryContext() -{ - if (CStoreContext != NULL) - { - MemoryContextReset(CStoreContext); - } -} - - -static void -cstore_init_write_state(Relation relation) -{ - if (CStoreWriteState != NULL) - { - /* TODO: consider whether it's possible for a new write to start */ - /* before an old one is flushed */ - Assert(CStoreWriteState->relation->rd_id == relation->rd_id); - } - - if (CStoreWriteState == NULL) - { - CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relation); - TupleDesc tupdesc = RelationGetDescr(relation); - - elog(LOG, "initializing write state for relation %d", relation->rd_id); - CStoreWriteState = CStoreBeginWrite(relation, - cstoreOptions->compressionType, - cstoreOptions->stripeRowCount, - cstoreOptions->blockRowCount, - tupdesc); - } -} - - -static void -cstore_free_write_state() -{ - if (CStoreWriteState != NULL) - { - elog(LOG, "flushing write state for relation %d", - CStoreWriteState->relation->rd_id); - CStoreEndWrite(CStoreWriteState); - CStoreWriteState = NULL; - } -} - - static List * RelationColumnList(Relation rel) { @@ -263,9 +200,10 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, uint32 flags, Bitmapset *attr_needed, List *scanQual) { TupleDesc tupdesc = relation->rd_att; + Oid relfilenode = relation->rd_node.relNode; CStoreScanDesc scan = palloc(sizeof(CStoreScanDescData)); List *neededColumnList = NIL; - MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(TopTransactionContext); ListCell *columnCell = NULL; scan->cs_base.rs_rd = relation; @@ -275,6 +213,15 @@ cstore_beginscan_extended(Relation relation, Snapshot snapshot, scan->cs_base.rs_flags = flags; scan->cs_base.rs_parallel = parallel_scan; + if (PendingWritesInUpperTransactions(relfilenode, GetCurrentSubTransactionId())) + { + elog(ERROR, + "cannot read from table when there is unflushed data in upper transactions"); + } + + FlushWriteStateForRelfilenode(relfilenode, GetCurrentSubTransactionId()); + + List *columnList = RelationColumnList(relation); /* only collect columns that we need for the scan */ @@ -319,7 +266,7 @@ static bool cstore_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *slot) { CStoreScanDesc scan = (CStoreScanDesc) sscan; - MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); + MemoryContext oldContext = MemoryContextSwitchTo(TopTransactionContext); ExecClearTuple(slot); @@ -437,9 +384,9 @@ static void cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, int options, BulkInsertState bistate) { - MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); - - cstore_init_write_state(relation); + TableWriteState *writeState = cstore_init_write_state(relation->rd_node, + RelationGetDescr(relation), + GetCurrentSubTransactionId()); HeapTuple heapTuple = ExecCopySlotHeapTuple(slot); if (HeapTupleHasExternal(heapTuple)) @@ -453,8 +400,7 @@ cstore_tuple_insert(Relation relation, TupleTableSlot *slot, CommandId cid, slot_getallattrs(slot); - CStoreWriteRow(CStoreWriteState, slot->tts_values, slot->tts_isnull); - MemoryContextSwitchTo(oldContext); + CStoreWriteRow(writeState, slot->tts_values, slot->tts_isnull); } @@ -479,9 +425,9 @@ static void cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, CommandId cid, int options, BulkInsertState bistate) { - MemoryContext oldContext = MemoryContextSwitchTo(GetCStoreMemoryContext()); - - cstore_init_write_state(relation); + TableWriteState *writeState = cstore_init_write_state(relation->rd_node, + RelationGetDescr(relation), + GetCurrentSubTransactionId()); for (int i = 0; i < ntuples; i++) { @@ -499,9 +445,8 @@ cstore_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, slot_getallattrs(tupleSlot); - CStoreWriteRow(CStoreWriteState, tupleSlot->tts_values, tupleSlot->tts_isnull); + CStoreWriteRow(writeState, tupleSlot->tts_values, tupleSlot->tts_isnull); } - MemoryContextSwitchTo(oldContext); } @@ -537,11 +482,9 @@ cstore_tuple_lock(Relation relation, ItemPointer tid, Snapshot snapshot, static void cstore_finish_bulk_insert(Relation relation, int options) { - /*TODO: flush relation like for heap? */ - /* free write state or only in ExecutorEnd_hook? */ - - /* for COPY */ - cstore_free_write_state(); + /* + * Nothing to do here. We keep write states live until transaction end. + */ } @@ -556,6 +499,9 @@ cstore_relation_set_new_filenode(Relation rel, uint64 blockRowCount = 0; uint64 stripeRowCount = 0; CompressionType compression = 0; + Oid oldRelfilenode = rel->rd_node.relNode; + + MarkRelfilenodeDropped(oldRelfilenode, GetCurrentSubTransactionId()); if (metadata != NULL) { @@ -589,7 +535,10 @@ cstore_relation_set_new_filenode(Relation rel, static void cstore_relation_nontransactional_truncate(Relation rel) { - DataFileMetadata *metadata = ReadDataFileMetadata(rel->rd_node.relNode, false); + Oid relfilenode = rel->rd_node.relNode; + DataFileMetadata *metadata = ReadDataFileMetadata(relfilenode, false); + + NonTransactionDropWriteState(relfilenode); /* * No need to set new relfilenode, since the table was created in this @@ -651,23 +600,23 @@ cstore_relation_copy_for_cluster(Relation OldHeap, Relation NewHeap, * relation first. */ - CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(OldHeap); + CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(OldHeap->rd_node.relNode); UpdateCStoreDataFileMetadata(NewHeap->rd_node.relNode, cstoreOptions->blockRowCount, cstoreOptions->stripeRowCount, cstoreOptions->compressionType); - cstoreOptions = CStoreTableAMGetOptions(NewHeap); + cstoreOptions = CStoreTableAMGetOptions(NewHeap->rd_node.relNode); - TableWriteState *writeState = CStoreBeginWrite(NewHeap, + TableWriteState *writeState = CStoreBeginWrite(NewHeap->rd_node, cstoreOptions->compressionType, cstoreOptions->stripeRowCount, cstoreOptions->blockRowCount, targetDesc); - TableReadState *readState = CStoreBeginRead(OldHeap, sourceDesc, RelationColumnList( - OldHeap), NULL); + TableReadState *readState = CStoreBeginRead(OldHeap, sourceDesc, + RelationColumnList(OldHeap), NULL); Datum *values = palloc0(sourceDesc->natts * sizeof(Datum)); bool *nulls = palloc0(sourceDesc->natts * sizeof(bool)); @@ -1046,18 +995,61 @@ cstore_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, static void -CStoreExecutorEnd(QueryDesc *queryDesc) +CStoreXactCallback(XactEvent event, void *arg) { - cstore_free_write_state(); - if (PreviousExecutorEndHook) + switch (event) { - PreviousExecutorEndHook(queryDesc); + case XACT_EVENT_COMMIT: + case XACT_EVENT_PARALLEL_COMMIT: + case XACT_EVENT_PREPARE: + { + /* nothing to do */ + break; + } + + case XACT_EVENT_ABORT: + case XACT_EVENT_PARALLEL_ABORT: + { + DiscardWriteStateForAllRels(GetCurrentSubTransactionId(), 0); + break; + } + + case XACT_EVENT_PRE_COMMIT: + case XACT_EVENT_PARALLEL_PRE_COMMIT: + case XACT_EVENT_PRE_PREPARE: + { + FlushWriteStateForAllRels(GetCurrentSubTransactionId(), 0); + break; + } } - else +} + + +static void +CStoreSubXactCallback(SubXactEvent event, SubTransactionId mySubid, + SubTransactionId parentSubid, void *arg) +{ + switch (event) { - standard_ExecutorEnd(queryDesc); + case SUBXACT_EVENT_START_SUB: + case SUBXACT_EVENT_COMMIT_SUB: + { + /* nothing to do */ + break; + } + + case SUBXACT_EVENT_ABORT_SUB: + { + DiscardWriteStateForAllRels(mySubid, parentSubid); + break; + } + + case SUBXACT_EVENT_PRE_COMMIT_SUB: + { + FlushWriteStateForAllRels(mySubid, parentSubid); + break; + } } - ResetCStoreMemoryContext(); } @@ -1109,12 +1101,13 @@ CStoreTableAMProcessUtility(PlannedStmt * plannedStatement, void cstore_tableam_init() { - PreviousExecutorEndHook = ExecutorEnd_hook; - ExecutorEnd_hook = CStoreExecutorEnd; + RegisterXactCallback(CStoreXactCallback, NULL); + RegisterSubXactCallback(CStoreSubXactCallback, NULL); + PreviousProcessUtilityHook = (ProcessUtility_hook != NULL) ? ProcessUtility_hook : standard_ProcessUtility; ProcessUtility_hook = CStoreTableAMProcessUtility; - prevObjectAccessHook = object_access_hook; + PrevObjectAccessHook = object_access_hook; object_access_hook = CStoreTableAMObjectAccessHook; cstore_customscan_init(); @@ -1124,7 +1117,7 @@ cstore_tableam_init() void cstore_tableam_finish() { - ExecutorEnd_hook = PreviousExecutorEndHook; + object_access_hook = PrevObjectAccessHook; } @@ -1140,9 +1133,9 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId subId, void *arg) { - if (prevObjectAccessHook) + if (PrevObjectAccessHook) { - prevObjectAccessHook(access, classId, objectId, subId, arg); + PrevObjectAccessHook(access, classId, objectId, subId, arg); } /* @@ -1166,7 +1159,10 @@ CStoreTableAMObjectAccessHook(ObjectAccessType access, Oid classId, Oid objectId * tableam tables storage is managed by postgres. */ Relation rel = table_open(objectId, AccessExclusiveLock); - DeleteDataFileMetadataRowIfExists(rel->rd_node.relNode); + Oid relfilenode = rel->rd_node.relNode; + DeleteDataFileMetadataRowIfExists(relfilenode); + + MarkRelfilenodeDropped(relfilenode, GetCurrentSubTransactionId()); /* keep the lock since we did physical changes to the relation */ table_close(rel, NoLock); diff --git a/src/backend/columnar/cstore_writer.c b/src/backend/columnar/cstore_writer.c index 735024fbf..75629241d 100644 --- a/src/backend/columnar/cstore_writer.c +++ b/src/backend/columnar/cstore_writer.c @@ -18,6 +18,7 @@ #include "safe_lib.h" +#include "access/heapam.h" #include "access/nbtree.h" #include "catalog/pg_am.h" #include "miscadmin.h" @@ -25,6 +26,7 @@ #include "storage/smgr.h" #include "utils/memutils.h" #include "utils/rel.h" +#include "utils/relfilenodemap.h" #include "columnar/cstore.h" #include "columnar/cstore_version_compat.h" @@ -58,7 +60,7 @@ static StringInfo CopyStringInfo(StringInfo sourceString); * will be added. */ TableWriteState * -CStoreBeginWrite(Relation relation, +CStoreBeginWrite(RelFileNode relfilenode, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor) @@ -101,11 +103,11 @@ CStoreBeginWrite(Relation relation, blockRowCount); TableWriteState *writeState = palloc0(sizeof(TableWriteState)); - writeState->relation = relation; + writeState->relfilenode = relfilenode; writeState->compressionType = compressionType; writeState->stripeMaxRowCount = stripeMaxRowCount; writeState->blockRowCount = blockRowCount; - writeState->tupleDescriptor = tupleDescriptor; + writeState->tupleDescriptor = CreateTupleDescCopy(tupleDescriptor); writeState->comparisonFunctionArray = comparisonFunctionArray; writeState->stripeBuffers = NULL; writeState->stripeSkipList = NULL; @@ -205,11 +207,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul stripeBuffers->rowCount++; if (stripeBuffers->rowCount >= writeState->stripeMaxRowCount) { - FlushStripe(writeState); - - /* set stripe data and skip list to NULL so they are recreated next time */ - writeState->stripeBuffers = NULL; - writeState->stripeSkipList = NULL; + CStoreFlushPendingWrites(writeState); } MemoryContextSwitchTo(oldContext); @@ -225,17 +223,7 @@ CStoreWriteRow(TableWriteState *writeState, Datum *columnValues, bool *columnNul void CStoreEndWrite(TableWriteState *writeState) { - StripeBuffers *stripeBuffers = writeState->stripeBuffers; - - if (stripeBuffers != NULL) - { - MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); - - FlushStripe(writeState); - MemoryContextReset(writeState->stripeWriteContext); - - MemoryContextSwitchTo(oldContext); - } + CStoreFlushPendingWrites(writeState); MemoryContextDelete(writeState->stripeWriteContext); pfree(writeState->comparisonFunctionArray); @@ -244,6 +232,25 @@ CStoreEndWrite(TableWriteState *writeState) } +void +CStoreFlushPendingWrites(TableWriteState *writeState) +{ + StripeBuffers *stripeBuffers = writeState->stripeBuffers; + if (stripeBuffers != NULL) + { + MemoryContext oldContext = MemoryContextSwitchTo(writeState->stripeWriteContext); + + FlushStripe(writeState); + + /* set stripe data and skip list to NULL so they are recreated next time */ + writeState->stripeBuffers = NULL; + writeState->stripeSkipList = NULL; + + MemoryContextSwitchTo(oldContext); + } +} + + /* * CreateEmptyStripeBuffers allocates an empty StripeBuffers structure with the given * column count. @@ -410,6 +417,10 @@ FlushStripe(TableWriteState *writeState) uint64 stripeSize = 0; uint64 stripeRowCount = 0; + Oid relationId = RelidByRelfilenode(writeState->relfilenode.spcNode, + writeState->relfilenode.relNode); + Relation relation = relation_open(relationId, NoLock); + /* * check if the last block needs serialization , the last block was not serialized * if it was not full yet, e.g. (rowCount > 0) @@ -459,7 +470,7 @@ FlushStripe(TableWriteState *writeState) stripeSkipList->blockSkipNodeArray[0][blockIndex].rowCount; } - stripeMetadata = ReserveStripe(writeState->relation, stripeSize, + stripeMetadata = ReserveStripe(relation, stripeSize, stripeRowCount, columnCount, blockCount, blockRowCount); @@ -486,7 +497,7 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo existsBuffer = blockBuffers->existsBuffer; - WriteToSmgr(writeState->relation, currentFileOffset, + WriteToSmgr(relation, currentFileOffset, existsBuffer->data, existsBuffer->len); currentFileOffset += existsBuffer->len; } @@ -497,16 +508,18 @@ FlushStripe(TableWriteState *writeState) columnBuffers->blockBuffersArray[blockIndex]; StringInfo valueBuffer = blockBuffers->valueBuffer; - WriteToSmgr(writeState->relation, currentFileOffset, + WriteToSmgr(relation, currentFileOffset, valueBuffer->data, valueBuffer->len); currentFileOffset += valueBuffer->len; } } /* create skip list and footer buffers */ - SaveStripeSkipList(writeState->relation->rd_node.relNode, + SaveStripeSkipList(relation->rd_node.relNode, stripeMetadata.id, stripeSkipList, tupleDescriptor); + + relation_close(relation, NoLock); } @@ -747,3 +760,10 @@ CopyStringInfo(StringInfo sourceString) return targetString; } + + +bool +ContainsPendingWrites(TableWriteState *state) +{ + return state->stripeBuffers != NULL && state->stripeBuffers->rowCount != 0; +} diff --git a/src/backend/columnar/write_state_management.c b/src/backend/columnar/write_state_management.c new file mode 100644 index 000000000..067d2199c --- /dev/null +++ b/src/backend/columnar/write_state_management.c @@ -0,0 +1,384 @@ + +#include "citus_version.h" +#if HAS_TABLEAM + +#include "postgres.h" + +#include + +#include "miscadmin.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/multixact.h" +#include "access/rewriteheap.h" +#include "access/tsmapi.h" +#if PG_VERSION_NUM >= 130000 +#include "access/heaptoast.h" +#else +#include "access/tuptoaster.h" +#endif +#include "access/xact.h" +#include "catalog/catalog.h" +#include "catalog/index.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_am.h" +#include "catalog/pg_trigger.h" +#include "catalog/storage.h" +#include "catalog/storage_xlog.h" +#include "commands/progress.h" +#include "commands/vacuum.h" +#include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "optimizer/plancat.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/bufpage.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/predicate.h" +#include "storage/procarray.h" +#include "storage/smgr.h" +#include "tcop/utility.h" +#include "utils/builtins.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/syscache.h" + +#include "columnar/cstore.h" +#include "columnar/cstore_customscan.h" +#include "columnar/cstore_tableam.h" +#include "columnar/cstore_version_compat.h" + + +/* + * Mapping from relfilenode to WriteStateMapEntry. This keeps write state for + * each relation. + */ +static HTAB *WriteStateMap = NULL; + +/* memory context for allocating WriteStateMap & all write states */ +static MemoryContext WriteStateContext = NULL; + +/* + * Each member of the writeStateStack in WriteStateMapEntry. This means that + * we did some inserts in the subtransaction subXid, and the state of those + * inserts is stored at writeState. Those writes can be flushed or unflushed. + */ +typedef struct SubXidWriteState +{ + SubTransactionId subXid; + TableWriteState *writeState; + + struct SubXidWriteState *next; +} SubXidWriteState; + + +/* + * An entry in WriteStateMap. + */ +typedef struct WriteStateMapEntry +{ + /* key of the entry */ + Oid relfilenode; + + /* + * If a table is dropped, we set dropped to true and set dropSubXid to the + * id of the subtransaction in which the drop happened. + */ + bool dropped; + SubTransactionId dropSubXid; + + /* + * Stack of SubXidWriteState where first element is top of the stack. When + * inserts happen, we look at top of the stack. If top of stack belongs to + * current subtransaction, we forward writes to its writeState. Otherwise, + * we create a new stack entry for current subtransaction and push it to + * the stack, and forward writes to that. + */ + SubXidWriteState *writeStateStack; +} WriteStateMapEntry; + + +/* + * Memory context reset callback so we reset WriteStateMap to NULL at the end + * of transaction. WriteStateMap is allocated in & WriteStateMap, so its + * leaked reference can cause memory issues. + */ +static MemoryContextCallback cleanupCallback; +static void +CleanupWriteStateMap(void *arg) +{ + WriteStateMap = NULL; + WriteStateContext = NULL; +} + + +TableWriteState * +cstore_init_write_state(RelFileNode relfilenode, TupleDesc tupdesc, + SubTransactionId currentSubXid) +{ + bool found; + + /* + * If this is the first call in current transaction, allocate the hash + * table. + */ + if (WriteStateMap == NULL) + { + WriteStateContext = + AllocSetContextCreate( + TopTransactionContext, + "Column Store Write State Management Context", + ALLOCSET_DEFAULT_SIZES); + HASHCTL info; + uint32 hashFlags = (HASH_ELEM | HASH_CONTEXT); + memset(&info, 0, sizeof(info)); + info.keysize = sizeof(Oid); + info.entrysize = sizeof(WriteStateMapEntry); + info.hcxt = WriteStateContext; + + WriteStateMap = hash_create("column store write state map", + 64, &info, hashFlags); + + cleanupCallback.arg = NULL; + cleanupCallback.func = &CleanupWriteStateMap; + cleanupCallback.next = NULL; + MemoryContextRegisterResetCallback(WriteStateContext, &cleanupCallback); + } + + WriteStateMapEntry *hashEntry = hash_search(WriteStateMap, &relfilenode.relNode, + HASH_ENTER, &found); + if (!found) + { + hashEntry->writeStateStack = NULL; + hashEntry->dropped = false; + } + + Assert(!hashEntry->dropped); + + /* + * If top of stack belongs to the current subtransaction, return its + * writeState, ... + */ + if (hashEntry->writeStateStack != NULL) + { + SubXidWriteState *stackHead = hashEntry->writeStateStack; + + if (stackHead->subXid == currentSubXid) + { + return stackHead->writeState; + } + } + + /* + * ... otherwise we need to create a new stack entry for the current + * subtransaction. + */ + MemoryContext oldContext = MemoryContextSwitchTo(WriteStateContext); + + CStoreOptions *cstoreOptions = CStoreTableAMGetOptions(relfilenode.relNode); + SubXidWriteState *stackEntry = palloc0(sizeof(SubXidWriteState)); + stackEntry->writeState = CStoreBeginWrite(relfilenode, + cstoreOptions->compressionType, + cstoreOptions->stripeRowCount, + cstoreOptions->blockRowCount, + tupdesc); + stackEntry->subXid = currentSubXid; + stackEntry->next = hashEntry->writeStateStack; + hashEntry->writeStateStack = stackEntry; + + MemoryContextSwitchTo(oldContext); + + return stackEntry->writeState; +} + + +/* + * Flushes pending writes for given relfilenode in the given subtransaction. + */ +void +FlushWriteStateForRelfilenode(Oid relfilenode, SubTransactionId currentSubXid) +{ + WriteStateMapEntry *entry; + bool found = false; + + if (WriteStateMap) + { + entry = hash_search(WriteStateMap, &relfilenode, HASH_FIND, &found); + } + + Assert(!found || !entry->dropped); + + if (found && entry->writeStateStack != NULL) + { + SubXidWriteState *stackEntry = entry->writeStateStack; + if (stackEntry->subXid == currentSubXid) + { + CStoreFlushPendingWrites(stackEntry->writeState); + } + } +} + + +/* + * Helper function for FlushWriteStateForAllRels and DiscardWriteStateForAllRels. + * Pops all of write states for current subtransaction, and depending on "commit" + * either flushes them or discards them. This also takes into account dropped + * tables, and either propagates the dropped flag to parent subtransaction or + * rolls back abort. + */ +static void +PopWriteStateForAllRels(SubTransactionId currentSubXid, SubTransactionId parentSubXid, + bool commit) +{ + HASH_SEQ_STATUS status; + WriteStateMapEntry *entry; + + if (WriteStateMap == NULL) + { + return; + } + + hash_seq_init(&status, WriteStateMap); + while ((entry = hash_seq_search(&status)) != 0) + { + if (entry->writeStateStack == NULL) + { + continue; + } + + /* + * If the table has been dropped in current subtransaction, either + * commit the drop or roll it back. + */ + if (entry->dropped) + { + if (entry->dropSubXid == currentSubXid) + { + if (commit) + { + /* elevate drop to the upper subtransaction */ + entry->dropSubXid = parentSubXid; + } + else + { + /* abort the drop */ + entry->dropped = false; + } + } + } + + /* + * Otherwise, commit or discard pending writes. + */ + else + { + SubXidWriteState *stackHead = entry->writeStateStack; + if (stackHead->subXid == currentSubXid) + { + if (commit) + { + CStoreEndWrite(stackHead->writeState); + } + + entry->writeStateStack = stackHead->next; + } + } + } +} + + +/* + * Called when current subtransaction is committed. + */ +void +FlushWriteStateForAllRels(SubTransactionId currentSubXid, SubTransactionId parentSubXid) +{ + PopWriteStateForAllRels(currentSubXid, parentSubXid, true); +} + + +/* + * Called when current subtransaction is aborted. + */ +void +DiscardWriteStateForAllRels(SubTransactionId currentSubXid, SubTransactionId parentSubXid) +{ + PopWriteStateForAllRels(currentSubXid, parentSubXid, false); +} + + +/* + * Called when the given relfilenode is dropped. + */ +void +MarkRelfilenodeDropped(Oid relfilenode, SubTransactionId currentSubXid) +{ + bool found = false; + + if (WriteStateMap == NULL) + { + return; + } + + WriteStateMapEntry *entry = hash_search(WriteStateMap, &relfilenode, HASH_FIND, + &found); + if (!found || entry->dropped) + { + return; + } + + entry->dropped = true; + entry->dropSubXid = currentSubXid; +} + + +/* + * Called when the given relfilenode is dropped in non-transactional TRUNCATE. + */ +void +NonTransactionDropWriteState(Oid relfilenode) +{ + if (WriteStateMap) + { + hash_search(WriteStateMap, &relfilenode, HASH_REMOVE, false); + } +} + + +/* + * Returns true if there are any pending writes in upper transactions. + */ +bool +PendingWritesInUpperTransactions(Oid relfilenode, SubTransactionId currentSubXid) +{ + WriteStateMapEntry *entry; + bool found = false; + + if (WriteStateMap) + { + entry = hash_search(WriteStateMap, &relfilenode, HASH_FIND, &found); + } + + if (found && entry->writeStateStack != NULL) + { + SubXidWriteState *stackEntry = entry->writeStateStack; + + while (stackEntry != NULL) + { + if (stackEntry->subXid != currentSubXid && + ContainsPendingWrites(stackEntry->writeState)) + { + return true; + } + + stackEntry = stackEntry->next; + } + } + + return false; +} + + +#endif diff --git a/src/include/columnar/cstore.h b/src/include/columnar/cstore.h index ff0051b95..edfe65efe 100644 --- a/src/include/columnar/cstore.h +++ b/src/include/columnar/cstore.h @@ -20,6 +20,7 @@ #include "nodes/parsenodes.h" #include "storage/bufpage.h" #include "storage/lockdefs.h" +#include "storage/relfilenode.h" #include "utils/relcache.h" #include "utils/snapmgr.h" @@ -224,7 +225,7 @@ typedef struct TableWriteState CompressionType compressionType; TupleDesc tupleDescriptor; FmgrInfo **comparisonFunctionArray; - Relation relation; + RelFileNode relfilenode; MemoryContext stripeWriteContext; StripeBuffers *stripeBuffers; @@ -251,14 +252,16 @@ extern void cstore_init(void); extern CompressionType ParseCompressionType(const char *compressionTypeString); /* Function declarations for writing to a cstore file */ -extern TableWriteState * CStoreBeginWrite(Relation relation, +extern TableWriteState * CStoreBeginWrite(RelFileNode relfilenode, CompressionType compressionType, uint64 stripeMaxRowCount, uint32 blockRowCount, TupleDesc tupleDescriptor); extern void CStoreWriteRow(TableWriteState *state, Datum *columnValues, bool *columnNulls); +extern void CStoreFlushPendingWrites(TableWriteState *state); extern void CStoreEndWrite(TableWriteState *state); +extern bool ContainsPendingWrites(TableWriteState *state); /* Function declarations for reading from a cstore file */ extern TableReadState * CStoreBeginRead(Relation relation, @@ -281,6 +284,7 @@ extern bool CompressBuffer(StringInfo inputBuffer, StringInfo outputBuffer, CompressionType compressionType); extern StringInfo DecompressBuffer(StringInfo buffer, CompressionType compressionType); extern char * CompressionTypeStr(CompressionType type); +extern CStoreOptions * CStoreTableAMGetOptions(Oid relfilenode); /* cstore_metadata_tables.c */ extern void DeleteDataFileMetadataRowIfExists(Oid relfilenode); @@ -300,6 +304,22 @@ extern StripeSkipList * ReadStripeSkipList(Oid relfilenode, uint64 stripe, TupleDesc tupleDescriptor, uint32 blockCount); + +/* write_state_management.c */ +extern TableWriteState * cstore_init_write_state(RelFileNode relfilenode, TupleDesc + tupdesc, + SubTransactionId currentSubXid); +extern void FlushWriteStateForRelfilenode(Oid relfilenode, SubTransactionId + currentSubXid); +extern void FlushWriteStateForAllRels(SubTransactionId currentSubXid, SubTransactionId + parentSubXid); +extern void DiscardWriteStateForAllRels(SubTransactionId currentSubXid, SubTransactionId + parentSubXid); +extern void MarkRelfilenodeDropped(Oid relfilenode, SubTransactionId currentSubXid); +extern void NonTransactionDropWriteState(Oid relfilenode); +extern bool PendingWritesInUpperTransactions(Oid relfilenode, + SubTransactionId currentSubXid); + typedef struct SmgrAddr { BlockNumber blockno; diff --git a/src/test/regress/columnar_am_schedule b/src/test/regress/columnar_am_schedule index 70ae97db3..3a4a46270 100644 --- a/src/test/regress/columnar_am_schedule +++ b/src/test/regress/columnar_am_schedule @@ -16,3 +16,5 @@ test: am_block_filtering test: am_join test: am_trigger test: am_tableoptions +test: am_recursive +test: am_transactions diff --git a/src/test/regress/expected/am_recursive.out b/src/test/regress/expected/am_recursive.out new file mode 100644 index 000000000..eb90dda47 --- /dev/null +++ b/src/test/regress/expected/am_recursive.out @@ -0,0 +1,252 @@ +CREATE TABLE t1(a int, b int) USING cstore_tableam; +CREATE TABLE t2(a int, b int) USING cstore_tableam; +CREATE FUNCTION f(x INT) RETURNS INT AS $$ + INSERT INTO t1 VALUES(x, x * 2) RETURNING b - 1; +$$ LANGUAGE SQL; +-- +-- Following query will start a write to t1 before finishing +-- write to t1, so it tests that we handle recursive writes +-- correctly. +-- +INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i; +-- there are no subtransactions, so above statement should batch +-- INSERTs inside the UDF and create on stripe per table. +SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2') +GROUP BY relname +ORDER BY relname; + relname | count +--------------------------------------------------------------------- + t1 | 1 + t2 | 1 +(2 rows) + +SELECT * FROM t1 ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +SELECT * FROM t2 ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 1 + 2 | 3 + 3 | 5 + 4 | 7 + 5 | 9 +(5 rows) + +TRUNCATE t1; +TRUNCATE t2; +DROP FUNCTION f(INT); +-- +-- Test the case when 2 writes are going on concurrently in the +-- same executor, and those 2 writes are dependent. +-- +WITH t AS ( + INSERT INTO t1 SELECT i, 2*i FROM generate_series(1, 5) i RETURNING * +) +INSERT INTO t2 SELECT t.a, t.a+1 FROM t; +SELECT * FROM t1; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +SELECT * FROM t2; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 3 + 3 | 4 + 4 | 5 + 5 | 6 +(5 rows) + +TRUNCATE t1; +TRUNCATE t2; +-- +-- Test the case when there are 2 independent inserts in a CTE. +-- Also tests the case where some of the tuple_inserts happen in +-- ExecutorFinish() instead of ExecutorRun(). +-- +WITH t AS ( + INSERT INTO t1 SELECT i, 2*i FROM generate_series(1, 5) i RETURNING * +) +INSERT INTO t2 SELECT i, (select count(*) from t1) FROM generate_series(1, 3) i; +SELECT * FROM t1; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +SELECT * FROM t2; + a | b +--------------------------------------------------------------------- + 1 | 0 + 2 | 0 + 3 | 0 +(3 rows) + +TRUNCATE t1; +TRUNCATE t2; +-- +-- double insert on the same relation +-- +WITH t AS ( + INSERT INTO t1 SELECT i, 2*i FROM generate_series(1, 5) i RETURNING * +) +INSERT INTO t1 SELECT t.a, t.a+1 FROM t; +SELECT * FROM t1 ORDER BY a, b; + a | b +--------------------------------------------------------------------- + 1 | 2 + 1 | 2 + 2 | 3 + 2 | 4 + 3 | 4 + 3 | 6 + 4 | 5 + 4 | 8 + 5 | 6 + 5 | 10 +(10 rows) + +TRUNCATE t1; +TRUNCATE t2; +-- +-- A test where result of a UDF call will depend on execution +-- of previous UDF calls. +-- +CREATE FUNCTION g(x INT) RETURNS INT AS $$ + INSERT INTO t1 VALUES(x, x * 2); + SELECT count(*)::int FROM t1; +$$ LANGUAGE SQL; +-- t3 and t4 are heap tables to help with cross-checking results +CREATE TABLE t3(a int, b int); +CREATE TABLE t4(a int, b int); +CREATE FUNCTION g2(x INT) RETURNS INT AS $$ + INSERT INTO t3 VALUES(x, x * 2); + SELECT count(*)::int FROM t3; +$$ LANGUAGE SQL; +INSERT INTO t2 SELECT i, g(i) FROM generate_series(1, 5) i; +INSERT INTO t4 SELECT i, g2(i) FROM generate_series(1, 5) i; +-- check that t1==t3 and t2==t4. +((table t1) except (table t3)) union ((table t3) except (table t1)); + a | b +--------------------------------------------------------------------- +(0 rows) + +((table t2) except (table t4)) union ((table t4) except (table t2)); + a | b +--------------------------------------------------------------------- +(0 rows) + +SELECT * FROM t2 ORDER BY a, b; + a | b +--------------------------------------------------------------------- + 1 | 1 + 2 | 2 + 3 | 3 + 4 | 4 + 5 | 5 +(5 rows) + +TRUNCATE t1, t2, t3, t4; +-- +-- INSERT into the same relation that was INSERTed into in the UDF +-- +INSERT INTO t1 SELECT i, g(i) FROM generate_series(1, 3) i; +INSERT INTO t3 SELECT i, g2(i) FROM generate_series(1, 3) i; +SELECT * FROM t1 ORDER BY a, b; + a | b +--------------------------------------------------------------------- + 1 | 1 + 1 | 2 + 2 | 3 + 2 | 4 + 3 | 5 + 3 | 6 +(6 rows) + +SELECT * FROM t3 ORDER BY a, b; + a | b +--------------------------------------------------------------------- + 1 | 1 + 1 | 2 + 2 | 3 + 2 | 4 + 3 | 5 + 3 | 6 +(6 rows) + +-- check that t1==t3 and t2==t4. +((table t1) except (table t3)) union ((table t3) except (table t1)); + a | b +--------------------------------------------------------------------- +(0 rows) + +((table t2) except (table t4)) union ((table t4) except (table t2)); + a | b +--------------------------------------------------------------------- +(0 rows) + +DROP FUNCTION g(int), g2(int); +TRUNCATE t1, t2, t3, t4; +-- +-- EXCEPTION in plpgsql, which is implemented internally using +-- subtransactions. plgpsql uses SPI to execute INSERT statements. +-- +CREATE FUNCTION f(a int) RETURNS VOID AS $$ +DECLARE + x int; +BEGIN + INSERT INTO t1 SELECT i, i + 1 FROM generate_series(a, a + 1) i; + x := 10 / a; + INSERT INTO t1 SELECT i, i * 2 FROM generate_series(a + 2, a + 3) i; +EXCEPTION WHEN division_by_zero THEN + INSERT INTO t1 SELECT i, i + 1 FROM generate_series(a + 2, a + 3) i; +END; +$$ LANGUAGE plpgsql; +SELECT f(10); + f +--------------------------------------------------------------------- + +(1 row) + +SELECT f(0), f(20); + f | f +--------------------------------------------------------------------- + | +(1 row) + +SELECT * FROM t1 ORDER BY a, b; + a | b +--------------------------------------------------------------------- + 2 | 3 + 3 | 4 + 10 | 11 + 11 | 12 + 12 | 24 + 13 | 26 + 20 | 21 + 21 | 22 + 22 | 44 + 23 | 46 +(10 rows) + +DROP FUNCTION f(int); +DROP TABLE t1, t2, t3, t4; diff --git a/src/test/regress/expected/am_rollback.out b/src/test/regress/expected/am_rollback.out index e58ef5135..a4c069377 100644 --- a/src/test/regress/expected/am_rollback.out +++ b/src/test/regress/expected/am_rollback.out @@ -37,6 +37,12 @@ WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; BEGIN; SAVEPOINT s0; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; -- force flush + count +--------------------------------------------------------------------- + 20 +(1 row) + SAVEPOINT s1; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SELECT count(*) FROM t; diff --git a/src/test/regress/expected/am_transactions.out b/src/test/regress/expected/am_transactions.out new file mode 100644 index 000000000..1410272dd --- /dev/null +++ b/src/test/regress/expected/am_transactions.out @@ -0,0 +1,142 @@ +-- +-- Testing we handle transactions properly +-- +CREATE TABLE t(a int, b int) USING cstore_tableam; +INSERT INTO t SELECT i, 2 * i FROM generate_series(1, 3) i; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 +(3 rows) + +-- verify that table rewrites work properly +BEGIN; +ALTER TABLE t ALTER COLUMN b TYPE float4 USING (b + 0.5)::float4; +INSERT INTO t VALUES (4, 8.5); +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2.5 + 2 | 4.5 + 3 | 6.5 + 4 | 8.5 +(4 rows) + +ROLLBACK; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 +(3 rows) + +-- verify truncate rollback +BEGIN; +TRUNCATE t; +INSERT INTO t VALUES (4, 8); +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 4 | 8 +(1 row) + +SAVEPOINT s1; +TRUNCATE t; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- +(0 rows) + +ROLLBACK TO SAVEPOINT s1; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 4 | 8 +(1 row) + +ROLLBACK; +-- verify truncate with unflushed data in upper xacts +BEGIN; +INSERT INTO t VALUES (4, 8); +SAVEPOINT s1; +TRUNCATE t; +ROLLBACK TO SAVEPOINT s1; +COMMIT; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 +(4 rows) + +-- verify DROP TABLE rollback +BEGIN; +INSERT INTO t VALUES (5, 10); +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +SAVEPOINT s1; +DROP TABLE t; +SELECT * FROM t ORDER BY a; +ERROR: relation "t" does not exist +ROLLBACK TO SAVEPOINT s1; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +ROLLBACK; +-- verify DROP TABLE with unflushed data in upper xacts +BEGIN; +INSERT INTO t VALUES (5, 10); +SAVEPOINT s1; +DROP TABLE t; +SELECT * FROM t ORDER BY a; +ERROR: relation "t" does not exist +ROLLBACK TO SAVEPOINT s1; +COMMIT; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +-- verify SELECT when unflushed data in upper transactions errors. +BEGIN; +INSERT INTO t VALUES (6, 12); +SAVEPOINT s1; +SELECT * FROM t; +ERROR: cannot read from table when there is unflushed data in upper transactions +ROLLBACK; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +DROP TABLE t; diff --git a/src/test/regress/expected/am_trigger.out b/src/test/regress/expected/am_trigger.out index f289b7dad..cd85c96bc 100644 --- a/src/test/regress/expected/am_trigger.out +++ b/src/test/regress/expected/am_trigger.out @@ -74,4 +74,98 @@ NOTICE: (3) CONTEXT: PL/pgSQL function trs_after() line 14 at RAISE NOTICE: (4) CONTEXT: PL/pgSQL function trs_after() line 14 at RAISE +SELECT * FROM test_tr ORDER BY i; + i +--------------------------------------------------------------------- + 1 + 2 + 3 + 4 +(4 rows) + drop table test_tr; +create table test_tr(i int) using cstore_tableam; +-- we should be able to clean-up and continue gracefully if we +-- error out in AFTER STATEMENT triggers. +CREATE SEQUENCE counter START 100; +create or replace function trs_after_erroring() returns trigger language plpgsql as $$ +BEGIN + IF nextval('counter') % 2 = 0 THEN + RAISE EXCEPTION '%', 'error'; + END IF; + RETURN NULL; +END; +$$; +create trigger tr_after_stmt_erroring after insert on test_tr + referencing new table as new_table + for each statement execute procedure trs_after_erroring(); +-- +-- Once upon a time we didn't clean-up properly after erroring out. Here the first +-- statement errors, but the second succeeds. In old times, because of failure in +-- clean-up, both rows were visible. But only the 2nd one should be visible. +-- +insert into test_tr values(5); +ERROR: error +CONTEXT: PL/pgSQL function trs_after_erroring() line 4 at RAISE +insert into test_tr values(6); +SELECT * FROM test_tr ORDER BY i; + i +--------------------------------------------------------------------- + 6 +(1 row) + +drop table test_tr; +-- +-- https://github.com/citusdata/cstore2/issues/32 +-- +create table events( + user_id bigint, + event_id bigint, + event_time timestamp default now(), + value float default random()) + PARTITION BY RANGE (event_time); +create table events_p2020_11_04_102965 +PARTITION OF events FOR VALUES FROM ('2020-11-04 00:00:00+01') TO ('2020-11-05 00:00:00+01') +USING cstore_tableam; +create table events_trigger_target( + user_id bigint, + avg float, + __count__ bigint +) USING cstore_tableam; +CREATE OR REPLACE FUNCTION user_value_by_day() + RETURNS trigger + LANGUAGE plpgsql +AS $function$ +BEGIN + IF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN + EXECUTE format($exec_format$INSERT INTO %s AS __mat__ SELECT user_id, 0.1 AS avg, pg_catalog.count(*) AS __count__ FROM __ins__ events GROUP BY user_id; + $exec_format$, TG_ARGV[0]); + END IF; + IF (TG_OP = 'DELETE' OR TG_OP = 'UPDATE') THEN + RAISE EXCEPTION $ex$MATERIALIZED VIEW 'user_value_by_day' on table 'events' does not support UPDATE/DELETE$ex$; + END IF; + IF (TG_OP = 'TRUNCATE') THEN + EXECUTE format($exec_format$TRUNCATE TABLE %s; $exec_format$, TG_ARGV[0]); + END IF; + RETURN NULL; +END; +$function$; +create trigger "user_value_by_day_INSERT" AFTER INSERT ON events + REFERENCING NEW TABLE AS __ins__ + FOR EACH STATEMENT EXECUTE FUNCTION user_value_by_day('events_trigger_target'); +COPY events FROM STDIN WITH (FORMAT 'csv'); +SELECT * FROM events ORDER BY user_id; + user_id | event_id | event_time | value +--------------------------------------------------------------------- + 1 | 1 | Wed Nov 04 15:54:02.226999 2020 | 1.1 + 2 | 3 | Wed Nov 04 16:54:02.226999 2020 | 2.2 +(2 rows) + +SELECT * FROM events_trigger_target ORDER BY user_id; + user_id | avg | __count__ +--------------------------------------------------------------------- + 1 | 0.1 | 1 + 2 | 0.1 | 1 +(2 rows) + +DROP TABLE events; diff --git a/src/test/regress/expected/am_vacuum_vs_insert.out b/src/test/regress/expected/am_vacuum_vs_insert.out index f5ef08673..a4539a073 100644 --- a/src/test/regress/expected/am_vacuum_vs_insert.out +++ b/src/test/regress/expected/am_vacuum_vs_insert.out @@ -11,7 +11,7 @@ step s1-insert: INSERT INTO test_vacuum_vs_insert SELECT i, 2 * i FROM generate_series(1, 3) i; s2: INFO: statistics for "test_vacuum_vs_insert": -total file size: 24576, total data size: 26 +total file size: 16384, total data size: 26 total row count: 3, stripe count: 1, average rows per stripe: 3 block count: 2, containing data for dropped columns: 0, none compressed: 2, pglz compressed: 0 diff --git a/src/test/regress/sql/am_recursive.sql b/src/test/regress/sql/am_recursive.sql new file mode 100644 index 000000000..1e3d6dc06 --- /dev/null +++ b/src/test/regress/sql/am_recursive.sql @@ -0,0 +1,143 @@ + +CREATE TABLE t1(a int, b int) USING cstore_tableam; +CREATE TABLE t2(a int, b int) USING cstore_tableam; + +CREATE FUNCTION f(x INT) RETURNS INT AS $$ + INSERT INTO t1 VALUES(x, x * 2) RETURNING b - 1; +$$ LANGUAGE SQL; + +-- +-- Following query will start a write to t1 before finishing +-- write to t1, so it tests that we handle recursive writes +-- correctly. +-- +INSERT INTO t2 SELECT i, f(i) FROM generate_series(1, 5) i; + +-- there are no subtransactions, so above statement should batch +-- INSERTs inside the UDF and create on stripe per table. +SELECT relname, count(*) FROM cstore.cstore_stripes a, pg_class b +WHERE a.relfilenode=b.relfilenode AND relname IN ('t1', 't2') +GROUP BY relname +ORDER BY relname; + +SELECT * FROM t1 ORDER BY a; +SELECT * FROM t2 ORDER BY a; + +TRUNCATE t1; +TRUNCATE t2; +DROP FUNCTION f(INT); + +-- +-- Test the case when 2 writes are going on concurrently in the +-- same executor, and those 2 writes are dependent. +-- +WITH t AS ( + INSERT INTO t1 SELECT i, 2*i FROM generate_series(1, 5) i RETURNING * +) +INSERT INTO t2 SELECT t.a, t.a+1 FROM t; + +SELECT * FROM t1; +SELECT * FROM t2; + +TRUNCATE t1; +TRUNCATE t2; + +-- +-- Test the case when there are 2 independent inserts in a CTE. +-- Also tests the case where some of the tuple_inserts happen in +-- ExecutorFinish() instead of ExecutorRun(). +-- +WITH t AS ( + INSERT INTO t1 SELECT i, 2*i FROM generate_series(1, 5) i RETURNING * +) +INSERT INTO t2 SELECT i, (select count(*) from t1) FROM generate_series(1, 3) i; + +SELECT * FROM t1; +SELECT * FROM t2; + +TRUNCATE t1; +TRUNCATE t2; + +-- +-- double insert on the same relation +-- +WITH t AS ( + INSERT INTO t1 SELECT i, 2*i FROM generate_series(1, 5) i RETURNING * +) +INSERT INTO t1 SELECT t.a, t.a+1 FROM t; + +SELECT * FROM t1 ORDER BY a, b; + +TRUNCATE t1; +TRUNCATE t2; + +-- +-- A test where result of a UDF call will depend on execution +-- of previous UDF calls. +-- + +CREATE FUNCTION g(x INT) RETURNS INT AS $$ + INSERT INTO t1 VALUES(x, x * 2); + SELECT count(*)::int FROM t1; +$$ LANGUAGE SQL; + +-- t3 and t4 are heap tables to help with cross-checking results +CREATE TABLE t3(a int, b int); +CREATE TABLE t4(a int, b int); + +CREATE FUNCTION g2(x INT) RETURNS INT AS $$ + INSERT INTO t3 VALUES(x, x * 2); + SELECT count(*)::int FROM t3; +$$ LANGUAGE SQL; + +INSERT INTO t2 SELECT i, g(i) FROM generate_series(1, 5) i; +INSERT INTO t4 SELECT i, g2(i) FROM generate_series(1, 5) i; + +-- check that t1==t3 and t2==t4. +((table t1) except (table t3)) union ((table t3) except (table t1)); +((table t2) except (table t4)) union ((table t4) except (table t2)); + +SELECT * FROM t2 ORDER BY a, b; + +TRUNCATE t1, t2, t3, t4; + +-- +-- INSERT into the same relation that was INSERTed into in the UDF +-- +INSERT INTO t1 SELECT i, g(i) FROM generate_series(1, 3) i; +INSERT INTO t3 SELECT i, g2(i) FROM generate_series(1, 3) i; +SELECT * FROM t1 ORDER BY a, b; +SELECT * FROM t3 ORDER BY a, b; + +-- check that t1==t3 and t2==t4. +((table t1) except (table t3)) union ((table t3) except (table t1)); +((table t2) except (table t4)) union ((table t4) except (table t2)); + +DROP FUNCTION g(int), g2(int); +TRUNCATE t1, t2, t3, t4; + +-- +-- EXCEPTION in plpgsql, which is implemented internally using +-- subtransactions. plgpsql uses SPI to execute INSERT statements. +-- + +CREATE FUNCTION f(a int) RETURNS VOID AS $$ +DECLARE + x int; +BEGIN + INSERT INTO t1 SELECT i, i + 1 FROM generate_series(a, a + 1) i; + x := 10 / a; + INSERT INTO t1 SELECT i, i * 2 FROM generate_series(a + 2, a + 3) i; +EXCEPTION WHEN division_by_zero THEN + INSERT INTO t1 SELECT i, i + 1 FROM generate_series(a + 2, a + 3) i; +END; +$$ LANGUAGE plpgsql; + +SELECT f(10); +SELECT f(0), f(20); + +SELECT * FROM t1 ORDER BY a, b; + +DROP FUNCTION f(int); +DROP TABLE t1, t2, t3, t4; + diff --git a/src/test/regress/sql/am_rollback.sql b/src/test/regress/sql/am_rollback.sql index da1cc8ce4..bda02152e 100644 --- a/src/test/regress/sql/am_rollback.sql +++ b/src/test/regress/sql/am_rollback.sql @@ -23,6 +23,7 @@ WHERE a.relfilenode = b.relfilenode AND b.relname = 't'; BEGIN; SAVEPOINT s0; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; +SELECT count(*) FROM t; -- force flush SAVEPOINT s1; INSERT INTO t SELECT i, i+1 FROM generate_series(1, 10) i; SELECT count(*) FROM t; diff --git a/src/test/regress/sql/am_transactions.sql b/src/test/regress/sql/am_transactions.sql new file mode 100644 index 000000000..b84790fb5 --- /dev/null +++ b/src/test/regress/sql/am_transactions.sql @@ -0,0 +1,70 @@ +-- +-- Testing we handle transactions properly +-- + +CREATE TABLE t(a int, b int) USING cstore_tableam; + +INSERT INTO t SELECT i, 2 * i FROM generate_series(1, 3) i; +SELECT * FROM t ORDER BY a; + +-- verify that table rewrites work properly +BEGIN; +ALTER TABLE t ALTER COLUMN b TYPE float4 USING (b + 0.5)::float4; +INSERT INTO t VALUES (4, 8.5); +SELECT * FROM t ORDER BY a; +ROLLBACK; + +SELECT * FROM t ORDER BY a; + +-- verify truncate rollback +BEGIN; +TRUNCATE t; +INSERT INTO t VALUES (4, 8); +SELECT * FROM t ORDER BY a; +SAVEPOINT s1; +TRUNCATE t; +SELECT * FROM t ORDER BY a; +ROLLBACK TO SAVEPOINT s1; +SELECT * FROM t ORDER BY a; +ROLLBACK; + +-- verify truncate with unflushed data in upper xacts +BEGIN; +INSERT INTO t VALUES (4, 8); +SAVEPOINT s1; +TRUNCATE t; +ROLLBACK TO SAVEPOINT s1; +COMMIT; + +SELECT * FROM t ORDER BY a; + +-- verify DROP TABLE rollback +BEGIN; +INSERT INTO t VALUES (5, 10); +SELECT * FROM t ORDER BY a; +SAVEPOINT s1; +DROP TABLE t; +SELECT * FROM t ORDER BY a; +ROLLBACK TO SAVEPOINT s1; +SELECT * FROM t ORDER BY a; +ROLLBACK; + +-- verify DROP TABLE with unflushed data in upper xacts +BEGIN; +INSERT INTO t VALUES (5, 10); +SAVEPOINT s1; +DROP TABLE t; +SELECT * FROM t ORDER BY a; +ROLLBACK TO SAVEPOINT s1; +COMMIT; +SELECT * FROM t ORDER BY a; + +-- verify SELECT when unflushed data in upper transactions errors. +BEGIN; +INSERT INTO t VALUES (6, 12); +SAVEPOINT s1; +SELECT * FROM t; +ROLLBACK; +SELECT * FROM t ORDER BY a; + +DROP TABLE t; diff --git a/src/test/regress/sql/am_trigger.sql b/src/test/regress/sql/am_trigger.sql index b8a918cf4..3cdd53b44 100644 --- a/src/test/regress/sql/am_trigger.sql +++ b/src/test/regress/sql/am_trigger.sql @@ -58,4 +58,87 @@ create trigger tr_after_row after insert on test_tr insert into test_tr values(1); insert into test_tr values(2),(3),(4); +SELECT * FROM test_tr ORDER BY i; + drop table test_tr; +create table test_tr(i int) using cstore_tableam; + +-- we should be able to clean-up and continue gracefully if we +-- error out in AFTER STATEMENT triggers. +CREATE SEQUENCE counter START 100; +create or replace function trs_after_erroring() returns trigger language plpgsql as $$ +BEGIN + IF nextval('counter') % 2 = 0 THEN + RAISE EXCEPTION '%', 'error'; + END IF; + RETURN NULL; +END; +$$; + +create trigger tr_after_stmt_erroring after insert on test_tr + referencing new table as new_table + for each statement execute procedure trs_after_erroring(); + +-- +-- Once upon a time we didn't clean-up properly after erroring out. Here the first +-- statement errors, but the second succeeds. In old times, because of failure in +-- clean-up, both rows were visible. But only the 2nd one should be visible. +-- +insert into test_tr values(5); +insert into test_tr values(6); +SELECT * FROM test_tr ORDER BY i; + +drop table test_tr; + +-- +-- https://github.com/citusdata/cstore2/issues/32 +-- +create table events( + user_id bigint, + event_id bigint, + event_time timestamp default now(), + value float default random()) + PARTITION BY RANGE (event_time); + +create table events_p2020_11_04_102965 +PARTITION OF events FOR VALUES FROM ('2020-11-04 00:00:00+01') TO ('2020-11-05 00:00:00+01') +USING cstore_tableam; + +create table events_trigger_target( + user_id bigint, + avg float, + __count__ bigint +) USING cstore_tableam; + +CREATE OR REPLACE FUNCTION user_value_by_day() + RETURNS trigger + LANGUAGE plpgsql +AS $function$ +BEGIN + IF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN + EXECUTE format($exec_format$INSERT INTO %s AS __mat__ SELECT user_id, 0.1 AS avg, pg_catalog.count(*) AS __count__ FROM __ins__ events GROUP BY user_id; + $exec_format$, TG_ARGV[0]); + END IF; + IF (TG_OP = 'DELETE' OR TG_OP = 'UPDATE') THEN + RAISE EXCEPTION $ex$MATERIALIZED VIEW 'user_value_by_day' on table 'events' does not support UPDATE/DELETE$ex$; + END IF; + IF (TG_OP = 'TRUNCATE') THEN + EXECUTE format($exec_format$TRUNCATE TABLE %s; $exec_format$, TG_ARGV[0]); + END IF; + RETURN NULL; +END; +$function$; + +create trigger "user_value_by_day_INSERT" AFTER INSERT ON events + REFERENCING NEW TABLE AS __ins__ + FOR EACH STATEMENT EXECUTE FUNCTION user_value_by_day('events_trigger_target'); + +COPY events FROM STDIN WITH (FORMAT 'csv'); +1,1,"2020-11-04 15:54:02.226999-08",1.1 +2,3,"2020-11-04 16:54:02.226999-08",2.2 +\. + +SELECT * FROM events ORDER BY user_id; +SELECT * FROM events_trigger_target ORDER BY user_id; + +DROP TABLE events; From 6711340ea695c865e0a0fbad5ff24c8f49e9f6bd Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 17 Nov 2020 20:00:57 -0800 Subject: [PATCH 122/124] Add prepared xact & stmt tests for columnar --- src/test/regress/expected/am_transactions.out | 102 ++++++++++++++++++ src/test/regress/sql/am_transactions.sql | 37 +++++++ 2 files changed, 139 insertions(+) diff --git a/src/test/regress/expected/am_transactions.out b/src/test/regress/expected/am_transactions.out index 1410272dd..d2490ab7d 100644 --- a/src/test/regress/expected/am_transactions.out +++ b/src/test/regress/expected/am_transactions.out @@ -139,4 +139,106 @@ SELECT * FROM t ORDER BY a; 5 | 10 (5 rows) +-- +-- Prepared transactions +-- +BEGIN; +INSERT INTO t VALUES (6, 12); +INSERT INTO t VALUES (7, 14); +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 + 6 | 12 + 7 | 14 +(7 rows) + +PREPARE TRANSACTION 'tx01'; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +ROLLBACK PREPARED 'tx01'; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +BEGIN; +INSERT INTO t VALUES (6, 13); +INSERT INTO t VALUES (7, 15); +PREPARE TRANSACTION 'tx02'; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 +(5 rows) + +COMMIT PREPARED 'tx02'; +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 + 6 | 13 + 7 | 15 +(7 rows) + +-- +-- Prepared statements +-- +PREPARE p1(int) AS INSERT INTO t VALUES (8, $1), (9, $1+2); +EXPLAIN (COSTS OFF) EXECUTE p1(16); + QUERY PLAN +--------------------------------------------------------------------- + Insert on t + -> Values Scan on "*VALUES*" +(2 rows) + +EXECUTE p1(16); +EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p1(20); + QUERY PLAN +--------------------------------------------------------------------- + Insert on t (actual rows=0 loops=1) + -> Values Scan on "*VALUES*" (actual rows=2 loops=1) +(2 rows) + +SELECT * FROM t ORDER BY a; + a | b +--------------------------------------------------------------------- + 1 | 2 + 2 | 4 + 3 | 6 + 4 | 8 + 5 | 10 + 6 | 13 + 7 | 15 + 8 | 16 + 8 | 20 + 9 | 18 + 9 | 22 +(11 rows) + DROP TABLE t; diff --git a/src/test/regress/sql/am_transactions.sql b/src/test/regress/sql/am_transactions.sql index b84790fb5..b99da0313 100644 --- a/src/test/regress/sql/am_transactions.sql +++ b/src/test/regress/sql/am_transactions.sql @@ -67,4 +67,41 @@ SELECT * FROM t; ROLLBACK; SELECT * FROM t ORDER BY a; +-- +-- Prepared transactions +-- + +BEGIN; +INSERT INTO t VALUES (6, 12); +INSERT INTO t VALUES (7, 14); +SELECT * FROM t ORDER BY a; +PREPARE TRANSACTION 'tx01'; + +SELECT * FROM t ORDER BY a; + +ROLLBACK PREPARED 'tx01'; + +SELECT * FROM t ORDER BY a; + +BEGIN; +INSERT INTO t VALUES (6, 13); +INSERT INTO t VALUES (7, 15); +PREPARE TRANSACTION 'tx02'; + +SELECT * FROM t ORDER BY a; + +COMMIT PREPARED 'tx02'; + +SELECT * FROM t ORDER BY a; + +-- +-- Prepared statements +-- + +PREPARE p1(int) AS INSERT INTO t VALUES (8, $1), (9, $1+2); +EXPLAIN (COSTS OFF) EXECUTE p1(16); +EXECUTE p1(16); +EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p1(20); +SELECT * FROM t ORDER BY a; + DROP TABLE t; From 2747fd80ff2b283a3431d082e808fbffd23e051c Mon Sep 17 00:00:00 2001 From: Hadi Moshayedi Date: Tue, 17 Nov 2020 20:13:20 -0800 Subject: [PATCH 123/124] Add prepared materialized view tests for columnar --- src/test/regress/columnar_am_schedule | 1 + src/test/regress/expected/am_matview.out | 77 ++++++++++++++++++++++++ src/test/regress/sql/am_matview.sql | 34 +++++++++++ 3 files changed, 112 insertions(+) create mode 100644 src/test/regress/expected/am_matview.out create mode 100644 src/test/regress/sql/am_matview.sql diff --git a/src/test/regress/columnar_am_schedule b/src/test/regress/columnar_am_schedule index 3a4a46270..2fddfd7cb 100644 --- a/src/test/regress/columnar_am_schedule +++ b/src/test/regress/columnar_am_schedule @@ -18,3 +18,4 @@ test: am_trigger test: am_tableoptions test: am_recursive test: am_transactions +test: am_matview diff --git a/src/test/regress/expected/am_matview.out b/src/test/regress/expected/am_matview.out new file mode 100644 index 000000000..79595b05d --- /dev/null +++ b/src/test/regress/expected/am_matview.out @@ -0,0 +1,77 @@ +-- +-- Testing we materialized views properly +-- +CREATE TABLE t(a int, b int) USING cstore_tableam; +INSERT INTO t SELECT floor(i / 4), 2 * i FROM generate_series(1, 10) i; +CREATE MATERIALIZED VIEW t_view(a, bsum, cnt) USING cstore_tableam AS + SELECT a, sum(b), count(*) FROM t GROUP BY a; +SELECT * FROM t_view a ORDER BY a; + a | bsum | cnt +--------------------------------------------------------------------- + 0 | 12 | 3 + 1 | 44 | 4 + 2 | 54 | 3 +(3 rows) + +INSERT INTO t SELECT floor(i / 4), 2 * i FROM generate_series(11, 20) i; +SELECT * FROM t_view a ORDER BY a; + a | bsum | cnt +--------------------------------------------------------------------- + 0 | 12 | 3 + 1 | 44 | 4 + 2 | 54 | 3 +(3 rows) + +REFRESH MATERIALIZED VIEW t_view; +SELECT * FROM t_view a ORDER BY a; + a | bsum | cnt +--------------------------------------------------------------------- + 0 | 12 | 3 + 1 | 44 | 4 + 2 | 76 | 4 + 3 | 108 | 4 + 4 | 140 | 4 + 5 | 40 | 1 +(6 rows) + +-- verify that we have created metadata entries for the materialized view +SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset +SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; + count +--------------------------------------------------------------------- + 1 +(1 row) + +SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; + count +--------------------------------------------------------------------- + 3 +(1 row) + +DROP TABLE t CASCADE; +NOTICE: drop cascades to materialized view t_view +-- dropping must remove metadata +SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; + count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; + count +--------------------------------------------------------------------- + 0 +(1 row) + diff --git a/src/test/regress/sql/am_matview.sql b/src/test/regress/sql/am_matview.sql new file mode 100644 index 000000000..a87b68b43 --- /dev/null +++ b/src/test/regress/sql/am_matview.sql @@ -0,0 +1,34 @@ +-- +-- Testing we materialized views properly +-- + +CREATE TABLE t(a int, b int) USING cstore_tableam; + +INSERT INTO t SELECT floor(i / 4), 2 * i FROM generate_series(1, 10) i; + +CREATE MATERIALIZED VIEW t_view(a, bsum, cnt) USING cstore_tableam AS + SELECT a, sum(b), count(*) FROM t GROUP BY a; + +SELECT * FROM t_view a ORDER BY a; + +INSERT INTO t SELECT floor(i / 4), 2 * i FROM generate_series(11, 20) i; + +SELECT * FROM t_view a ORDER BY a; + +REFRESH MATERIALIZED VIEW t_view; + +SELECT * FROM t_view a ORDER BY a; + +-- verify that we have created metadata entries for the materialized view +SELECT relfilenode FROM pg_class WHERE relname='t_view' \gset + +SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; + +DROP TABLE t CASCADE; + +-- dropping must remove metadata +SELECT count(*) FROM cstore.cstore_data_files WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_stripes WHERE relfilenode=:relfilenode; +SELECT count(*) FROM cstore.cstore_skipnodes WHERE relfilenode=:relfilenode; From 9c449112261c2e8f081514d3d5b5f972b26d64de Mon Sep 17 00:00:00 2001 From: SaitTalhaNisanci Date: Wed, 18 Nov 2020 17:16:06 +0300 Subject: [PATCH 124/124] Improve error messages in shard pruning (#4324) --- .../distributed/planner/shard_pruning.c | 7 +- .../regress/expected/multi_hash_pruning.out | 310 +++++++++--------- ...multi_repartition_join_task_assignment.out | 24 +- .../expected/multi_task_assignment_policy.out | 16 +- 4 files changed, 180 insertions(+), 177 deletions(-) diff --git a/src/backend/distributed/planner/shard_pruning.c b/src/backend/distributed/planner/shard_pruning.c index 1eebda1dc..9e720b235 100644 --- a/src/backend/distributed/planner/shard_pruning.c +++ b/src/backend/distributed/planner/shard_pruning.c @@ -486,6 +486,7 @@ PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList, if (IsLoggableLevel(DEBUG3)) { + char *relationName = get_rel_name(relationId); if (foundRestriction && debugLoggedPruningInstances != NIL) { List *deparseCtx = deparse_context_for("unknown", relationId); @@ -497,10 +498,12 @@ PruneShards(Oid relationId, Index rangeTableId, List *whereClauseList, } else { - ereport(DEBUG3, (errmsg("no valid constraints found"))); + ereport(DEBUG3, (errmsg("no sharding pruning constraints on %s found", + relationName))); } - ereport(DEBUG3, (errmsg("shard count: %d", list_length(prunedList)))); + ereport(DEBUG3, (errmsg("shard count after pruning for %s: %d", relationName, + list_length(prunedList)))); } /* if requested, copy the partition value constant */ diff --git a/src/test/regress/expected/multi_hash_pruning.out b/src/test/regress/expected/multi_hash_pruning.out index 66ef3336f..ed7d935f8 100644 --- a/src/test/regress/expected/multi_hash_pruning.out +++ b/src/test/regress/expected/multi_hash_pruning.out @@ -239,12 +239,12 @@ SELECT count(*) FROM lineitem_hash_part DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count: 3 +DEBUG: shard count after pruning for lineitem_hash_part: 3 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count: 3 +DEBUG: shard count after pruning for lineitem_hash_part: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -258,12 +258,12 @@ SELECT count(*) FROM lineitem_hash_part DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count: 3 +DEBUG: shard count after pruning for lineitem_hash_part: 3 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count: 3 +DEBUG: shard count after pruning for lineitem_hash_part: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -283,11 +283,11 @@ DEBUG: Creating router plan SELECT count(*) FROM lineitem_hash_part WHERE l_orderkey = ANY (NULL); -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -299,11 +299,11 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM lineitem_hash_part WHERE l_orderkey IN (NULL) OR TRUE; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -315,11 +315,11 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM lineitem_hash_part WHERE l_orderkey = ANY (NULL) OR TRUE; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -331,15 +331,15 @@ DEBUG: assigned task to node localhost:xxxxx -- Check whether we support IN/ANY in subquery SELECT count(*) FROM lineitem_hash_part WHERE l_orderkey IN (SELECT l_orderkey FROM lineitem_hash_part); -DEBUG: no valid constraints found -DEBUG: shard count: 4 -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -350,15 +350,15 @@ DEBUG: assigned task to node localhost:xxxxx (1 row) SELECT count(*) FROM lineitem_hash_part WHERE l_orderkey = ANY (SELECT l_orderkey FROM lineitem_hash_part); -DEBUG: no valid constraints found -DEBUG: shard count: 4 -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 +DEBUG: no sharding pruning constraints on lineitem_hash_part found +DEBUG: shard count after pruning for lineitem_hash_part: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -374,7 +374,7 @@ SELECT count(*) FROM lineitem DEBUG: Router planner does not support append-partitioned tables. DEBUG: constraint (lteq) value: '3'::bigint DEBUG: constraint (gteq) value: '1'::bigint -DEBUG: shard count: 1 +DEBUG: shard count after pruning for lineitem: 1 DEBUG: assigned task to node localhost:xxxxx count --------------------------------------------------------------------- @@ -386,7 +386,7 @@ SELECT count(*) FROM lineitem DEBUG: Router planner does not support append-partitioned tables. DEBUG: constraint (lteq) value: '3'::bigint DEBUG: constraint (gteq) value: '1'::bigint -DEBUG: shard count: 1 +DEBUG: shard count after pruning for lineitem: 1 DEBUG: assigned task to node localhost:xxxxx count --------------------------------------------------------------------- @@ -400,7 +400,7 @@ DEBUG: Router planner does not support append-partitioned tables. DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count: 1 +DEBUG: shard count after pruning for lineitem: 1 DEBUG: assigned task to node localhost:xxxxx count --------------------------------------------------------------------- @@ -413,7 +413,7 @@ DEBUG: Router planner does not support append-partitioned tables. DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count: 1 +DEBUG: shard count after pruning for lineitem: 1 DEBUG: assigned task to node localhost:xxxxx count --------------------------------------------------------------------- @@ -423,8 +423,8 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM lineitem WHERE l_orderkey = ANY(NULL) OR TRUE; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no valid constraints found -DEBUG: shard count: 2 +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -437,7 +437,7 @@ SELECT count(*) FROM lineitem_range DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count: 1 +DEBUG: shard count after pruning for lineitem_range: 1 DEBUG: Creating router plan count --------------------------------------------------------------------- @@ -449,7 +449,7 @@ SELECT count(*) FROM lineitem_range DEBUG: constraint value: '1'::bigint DEBUG: constraint value: '2'::bigint DEBUG: constraint value: '3'::bigint -DEBUG: shard count: 1 +DEBUG: shard count after pruning for lineitem_range: 1 DEBUG: Creating router plan count --------------------------------------------------------------------- @@ -458,11 +458,11 @@ DEBUG: Creating router plan SELECT count(*) FROM lineitem_range WHERE l_orderkey = ANY(NULL) OR TRUE; -DEBUG: no valid constraints found -DEBUG: shard count: 2 +DEBUG: no sharding pruning constraints on lineitem_range found +DEBUG: shard count after pruning for lineitem_range: 2 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 2 +DEBUG: no sharding pruning constraints on lineitem_range found +DEBUG: shard count after pruning for lineitem_range: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -474,11 +474,11 @@ DEBUG: assigned task to node localhost:xxxxx -- equality operator SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey < ALL ('{1,2,3}'); -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -492,11 +492,11 @@ DEBUG: assigned task to node localhost:xxxxx -- columns are used with ANY/IN/ALL SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 OR o_totalprice IN (20, 30); -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -508,11 +508,11 @@ DEBUG: assigned task to node localhost:xxxxx -- Check that we cannot prune for mutable functions. SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = (random() + 100); -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -524,11 +524,11 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = (random() + 100) OR o_orderkey = 1; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -541,7 +541,7 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = (random() + 100) AND o_orderkey = 1; DEBUG: constraint value: 1 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 count @@ -553,15 +553,15 @@ DEBUG: query has a single distribution column value: 1 SELECT count(*) FROM orders_hash_partitioned orders1, orders_hash_partitioned orders2 WHERE orders1.o_orderkey = orders2.o_orderkey; -DEBUG: no valid constraints found -DEBUG: shard count: 4 -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: join prunable for intervals [-2147483648,-1073741825] and [-1073741824,-1] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [0,1073741823] DEBUG: join prunable for intervals [-2147483648,-1073741825] and [1073741824,2147483647] @@ -589,9 +589,9 @@ SELECT count(*) AND orders1.o_orderkey = 1 AND orders2.o_orderkey is NULL; DEBUG: constraint value: 1 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: constraint value: 1 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 count @@ -601,11 +601,11 @@ DEBUG: query has a single distribution column value: 1 -- All shards used without constraints SELECT count(*) FROM orders_hash_partitioned; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -619,7 +619,7 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1; DEBUG: constraint value: 1 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 count @@ -631,7 +631,7 @@ DEBUG: query has a single distribution column value: 1 SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 AND (o_custkey = 11 OR o_custkey = 22); DEBUG: constraint value: 1 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 count @@ -644,11 +644,11 @@ SELECT count(*) FROM orders_hash_partitioned WHERE (o_orderkey = 1 OR o_orderkey = 2); DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -661,11 +661,11 @@ SELECT count(*) FROM orders_hash_partitioned WHERE (o_orderkey = 1 OR o_orderkey = 2) AND (o_custkey = 11 OR o_custkey = 22); DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -680,13 +680,13 @@ DEBUG: constraint value: 1 DEBUG: constraint value: 1 DEBUG: constraint value: 2 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 1 DEBUG: constraint value: 2 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -699,11 +699,11 @@ SELECT count(*) FROM orders_hash_partitioned WHERE (o_orderkey IN (1,2)) AND (o_custkey = 11 OR o_custkey = 22 OR o_custkey = 33); DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -716,11 +716,11 @@ SELECT count(*) FROM orders_hash_partitioned WHERE (o_orderkey IN (1,2)) AND (o_totalprice < 11 OR o_totalprice > 19) AND o_shippriority > 100 AND (o_custkey = 11 OR o_custkey = 22); DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -735,13 +735,13 @@ DEBUG: constraint value: 1 DEBUG: constraint value: 2 DEBUG: constraint value: 2 DEBUG: constraint value: 3 -DEBUG: shard count: 3 +DEBUG: shard count after pruning for orders_hash_partitioned: 3 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 DEBUG: constraint value: 2 DEBUG: constraint value: 3 -DEBUG: shard count: 3 +DEBUG: shard count after pruning for orders_hash_partitioned: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -753,11 +753,11 @@ DEBUG: assigned task to node localhost:xxxxx -- All shards used with prunable expression ORed with unprunable expression SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey IN (1,2) OR o_custkey = 33; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -773,12 +773,12 @@ SELECT count(*) FROM orders_hash_partitioned DEBUG: constraint value: 1 DEBUG: constraint value: 2 DEBUG: constraint value: 3 -DEBUG: shard count: 3 +DEBUG: shard count after pruning for orders_hash_partitioned: 3 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 DEBUG: constraint value: 3 -DEBUG: shard count: 3 +DEBUG: shard count after pruning for orders_hash_partitioned: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -792,11 +792,11 @@ SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 OR (o_orderkey = 2 AND (o_custkey = 11 OR (o_orderkey = 3 AND o_custkey = 44))); DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -809,11 +809,11 @@ SELECT count(*) FROM orders_hash_partitioned WHERE (o_orderkey IN (1,2)) AND (o_custkey = 11 OR o_custkey = 22 OR o_custkey = 33) AND o_totalprice <= 20; DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -826,11 +826,11 @@ SELECT count(*) FROM orders_hash_partitioned WHERE (o_orderkey IN (1,2)) AND (o_custkey = 11 OR o_custkey = 33) AND o_custkey = 22; DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -841,11 +841,11 @@ DEBUG: assigned task to node localhost:xxxxx -- All shards used with prunable SAO constraint ORed with unprunable nested expression SELECT count(*) FROM orders_hash_partitioned WHERE ((o_orderkey IN (1,2)) AND (o_custkey = 11 OR o_custkey = 22)) OR o_custkey = 33; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -861,12 +861,12 @@ SELECT count(*) FROM orders_hash_partitioned DEBUG: constraint value: 3 DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 3 +DEBUG: shard count after pruning for orders_hash_partitioned: 3 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 3 DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 3 +DEBUG: shard count after pruning for orders_hash_partitioned: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -878,11 +878,11 @@ DEBUG: assigned task to node localhost:xxxxx -- All shards used with ORed top level unprunable expression SELECT count(*) FROM orders_hash_partitioned WHERE o_custkey = 11 OR (o_orderkey = 2 AND o_custkey = 22); -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -896,7 +896,7 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 OR (o_orderkey = 2 AND (o_orderkey = 3 OR (o_orderkey = 1 AND o_custkey = 11))); DEBUG: constraint value: 1 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: Creating router plan count --------------------------------------------------------------------- @@ -906,8 +906,8 @@ DEBUG: Creating router plan -- Single shard used when top prunable expression is restrictive with nested ANDs SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 AND ((o_orderkey = 2 OR o_orderkey = 3) AND (o_custkey = 11 OR o_custkey = 22)); -DEBUG: no valid constraints found -DEBUG: shard count: 0 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 0 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 count @@ -921,12 +921,12 @@ SELECT count(*) FROM orders_hash_partitioned DEBUG: constraint value: 1 DEBUG: constraint value: 2 DEBUG: constraint value: 3 -DEBUG: shard count: 3 +DEBUG: shard count after pruning for orders_hash_partitioned: 3 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 DEBUG: constraint value: 3 -DEBUG: shard count: 3 +DEBUG: shard count after pruning for orders_hash_partitioned: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -938,11 +938,11 @@ DEBUG: assigned task to node localhost:xxxxx -- Deeply nested non prunable expression uses all shards SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 OR ((o_orderkey = 2 OR o_custkey = 11) AND (o_custkey = 22 OR o_custkey = 33)); -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -955,11 +955,11 @@ DEBUG: assigned task to node localhost:xxxxx -- a OR partkey != x Uses all shards SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 OR o_orderkey != 2; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -972,11 +972,11 @@ DEBUG: assigned task to node localhost:xxxxx -- a OR partkey IS NULL Uses all shards SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 OR o_orderkey IS NULL; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -989,11 +989,11 @@ DEBUG: assigned task to node localhost:xxxxx -- a OR partkey IS NOT NULL Uses all shards SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey = 1 OR o_orderkey IS NOT NULL; -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -1015,11 +1015,11 @@ DEBUG: Creating router plan -- Check that NOT is handled with EQs ORed SELECT count(*) FROM orders_hash_partitioned WHERE NOT (o_orderkey = 2 OR o_orderkey = 3); -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -1034,11 +1034,11 @@ SELECT count(*) FROM orders_hash_partitioned WHERE NOT (o_orderkey != 2 AND o_orderkey != 3); DEBUG: constraint value: 2 DEBUG: constraint value: 3 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 2 DEBUG: constraint value: 3 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -1049,11 +1049,11 @@ DEBUG: assigned task to node localhost:xxxxx -- Check that NOT is handled with EQs ANDed SELECT count(*) FROM orders_hash_partitioned WHERE NOT (o_orderkey = 2 AND o_orderkey = 3); -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -1067,24 +1067,24 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey IN (1,2) AND o_custkey NOT IN (SELECT o_custkey FROM orders_hash_partitioned WHERE o_orderkey = 1); DEBUG: constraint value: 1 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 1 DEBUG: generating subplan XXX_1 for subquery SELECT o_custkey FROM public.orders_hash_partitioned WHERE (o_orderkey OPERATOR(pg_catalog.=) 1) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.orders_hash_partitioned WHERE ((o_orderkey OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2])) AND (NOT (o_custkey OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.o_custkey FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(o_custkey integer))))) DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 1 DEBUG: constraint value: 2 -DEBUG: shard count: 2 +DEBUG: shard count after pruning for orders_hash_partitioned: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx count @@ -1096,21 +1096,21 @@ DEBUG: assigned task to node localhost:xxxxx SELECT count(*) FROM orders_hash_partitioned WHERE o_orderkey IN (1,2) OR o_custkey NOT IN (SELECT o_custkey FROM orders_hash_partitioned WHERE o_orderkey = 3); DEBUG: constraint value: 3 -DEBUG: shard count: 1 -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries DEBUG: constraint value: 3 -DEBUG: shard count: 1 +DEBUG: shard count after pruning for orders_hash_partitioned: 1 DEBUG: Creating router plan DEBUG: query has a single distribution column value: 3 DEBUG: generating subplan XXX_1 for subquery SELECT o_custkey FROM public.orders_hash_partitioned WHERE (o_orderkey OPERATOR(pg_catalog.=) 3) DEBUG: Plan XXX query after replacing subqueries and CTEs: SELECT count(*) AS count FROM public.orders_hash_partitioned WHERE ((o_orderkey OPERATOR(pg_catalog.=) ANY (ARRAY[1, 2])) OR (NOT (o_custkey OPERATOR(pg_catalog.=) ANY (SELECT intermediate_result.o_custkey FROM read_intermediate_result('XXX_1'::text, 'binary'::citus_copy_format) intermediate_result(o_custkey integer))))) -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: Router planner cannot handle multi-shard select queries -DEBUG: no valid constraints found -DEBUG: shard count: 4 +DEBUG: no sharding pruning constraints on orders_hash_partitioned found +DEBUG: shard count after pruning for orders_hash_partitioned: 4 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx diff --git a/src/test/regress/expected/multi_repartition_join_task_assignment.out b/src/test/regress/expected/multi_repartition_join_task_assignment.out index 7d00a08c1..77d0158bc 100644 --- a/src/test/regress/expected/multi_repartition_join_task_assignment.out +++ b/src/test/regress/expected/multi_repartition_join_task_assignment.out @@ -18,12 +18,12 @@ FROM WHERE o_custkey = c_custkey; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no valid constraints found -DEBUG: shard count: 2 +DEBUG: no sharding pruning constraints on orders found +DEBUG: shard count after pruning for orders: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx -DEBUG: no valid constraints found -DEBUG: shard count: 3 +DEBUG: no sharding pruning constraints on customer_append found +DEBUG: shard count after pruning for customer_append: 3 DEBUG: join prunable for intervals [1,1000] and [1001,2000] DEBUG: join prunable for intervals [1,1000] and [6001,7000] DEBUG: join prunable for intervals [1001,2000] and [1,1000] @@ -56,13 +56,13 @@ WHERE o_custkey = c_custkey AND o_orderkey = l_orderkey; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no valid constraints found -DEBUG: shard count: 3 +DEBUG: no sharding pruning constraints on customer_append found +DEBUG: shard count after pruning for customer_append: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx -DEBUG: no valid constraints found -DEBUG: shard count: 2 +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 DEBUG: join prunable for intervals [1,5986] and [8997,14947] DEBUG: join prunable for intervals [8997,14947] and [1,5986] DEBUG: pruning merge fetch taskId 1 @@ -85,12 +85,12 @@ FROM WHERE l_partkey = c_nationkey; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no valid constraints found -DEBUG: shard count: 2 +DEBUG: no sharding pruning constraints on lineitem found +DEBUG: shard count after pruning for lineitem: 2 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx -DEBUG: no valid constraints found -DEBUG: shard count: 3 +DEBUG: no sharding pruning constraints on customer_append found +DEBUG: shard count after pruning for customer_append: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx diff --git a/src/test/regress/expected/multi_task_assignment_policy.out b/src/test/regress/expected/multi_task_assignment_policy.out index 4cb629fa0..4c52df355 100644 --- a/src/test/regress/expected/multi_task_assignment_policy.out +++ b/src/test/regress/expected/multi_task_assignment_policy.out @@ -70,8 +70,8 @@ SET client_min_messages TO DEBUG3; SET citus.task_assignment_policy TO 'greedy'; EXPLAIN (COSTS OFF) SELECT count(*) FROM task_assignment_test_table; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no valid constraints found -DEBUG: shard count: 3 +DEBUG: no sharding pruning constraints on task_assignment_test_table found +DEBUG: shard count after pruning for task_assignment_test_table: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -84,8 +84,8 @@ DEBUG: assigned task to node localhost:xxxxx EXPLAIN (COSTS OFF) SELECT count(*) FROM task_assignment_test_table; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no valid constraints found -DEBUG: shard count: 3 +DEBUG: no sharding pruning constraints on task_assignment_test_table found +DEBUG: shard count after pruning for task_assignment_test_table: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -100,8 +100,8 @@ DEBUG: assigned task to node localhost:xxxxx SET citus.task_assignment_policy TO 'first-replica'; EXPLAIN (COSTS OFF) SELECT count(*) FROM task_assignment_test_table; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no valid constraints found -DEBUG: shard count: 3 +DEBUG: no sharding pruning constraints on task_assignment_test_table found +DEBUG: shard count after pruning for task_assignment_test_table: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx @@ -114,8 +114,8 @@ DEBUG: assigned task to node localhost:xxxxx EXPLAIN (COSTS OFF) SELECT count(*) FROM task_assignment_test_table; DEBUG: Router planner does not support append-partitioned tables. -DEBUG: no valid constraints found -DEBUG: shard count: 3 +DEBUG: no sharding pruning constraints on task_assignment_test_table found +DEBUG: shard count after pruning for task_assignment_test_table: 3 DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx DEBUG: assigned task to node localhost:xxxxx