From fdf090758b2ddb8baf3a84a98b919e869f273bc8 Mon Sep 17 00:00:00 2001 From: Ying Xu <32597660+yxu2162@users.noreply.github.com> Date: Wed, 27 Jul 2022 11:06:49 -0700 Subject: [PATCH] Bugfix for IN clause to be considered during planner phase in Columnar (#6030) Reported bug #5803 shows that we are currently not sending the IN clause to our planner for columnar. This PR fixes it by checking for ScalarArrayOpExpr in ExtractPushdownClause so that we do not skip it. Also added a test case for this new addition. --- src/backend/columnar/columnar_customscan.c | 12 ++++ .../expected/columnar_chunk_filtering.out | 64 +++++++++++++++++++ .../regress/sql/columnar_chunk_filtering.sql | 41 ++++++++++++ 3 files changed, 117 insertions(+) diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c index 4d4ba63f0..98c13e2a7 100644 --- a/src/backend/columnar/columnar_customscan.c +++ b/src/backend/columnar/columnar_customscan.c @@ -824,6 +824,18 @@ ExtractPushdownClause(PlannerInfo *root, RelOptInfo *rel, Node *node) } } + if (IsA(node, ScalarArrayOpExpr)) + { + if (!contain_volatile_functions(node)) + { + return (Expr *) node; + } + else + { + return NULL; + } + } + if (!IsA(node, OpExpr) || list_length(((OpExpr *) node)->args) != 2) { ereport(ColumnarPlannerDebugLevel, diff --git a/src/test/regress/expected/columnar_chunk_filtering.out b/src/test/regress/expected/columnar_chunk_filtering.out index 980b2454f..09688d7aa 100644 --- a/src/test/regress/expected/columnar_chunk_filtering.out +++ b/src/test/regress/expected/columnar_chunk_filtering.out @@ -1066,3 +1066,67 @@ RESET columnar.max_custom_scan_paths; RESET columnar.qual_pushdown_correlation_threshold; RESET columnar.planner_debug_level; DROP TABLE pushdown_test; +-- https://github.com/citusdata/citus/issues/5803 +CREATE TABLE pushdown_test(id int, country text) using columnar; +BEGIN; + INSERT INTO pushdown_test VALUES(1, 'AL'); + INSERT INTO pushdown_test VALUES(2, 'AU'); +END; +BEGIN; + INSERT INTO pushdown_test VALUES(3, 'BR'); + INSERT INTO pushdown_test VALUES(4, 'BT'); +END; +BEGIN; + INSERT INTO pushdown_test VALUES(5, 'PK'); + INSERT INTO pushdown_test VALUES(6, 'PA'); +END; +BEGIN; + INSERT INTO pushdown_test VALUES(7, 'USA'); + INSERT INTO pushdown_test VALUES(8, 'ZW'); +END; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW'); + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1) + Filter: (country = ANY ('{USA,BR,ZW}'::text[])) + Rows Removed by Filter: 1 + Columnar Projected Columns: id, country + Columnar Chunk Group Filters: (country = ANY ('{USA,BR,ZW}'::text[])) + Columnar Chunk Groups Removed by Filter: 2 +(6 rows) + +SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW'); + id +--------------------------------------------------------------------- + 3 + 7 + 8 +(3 rows) + +-- test for volatile functions with IN +CREATE FUNCTION volatileFunction() returns TEXT language plpgsql AS +$$ +BEGIN + return 'AL'; +END; +$$; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM pushdown_test WHERE country IN ('USA', 'ZW', volatileFunction()); + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1) + Filter: (country = ANY (ARRAY['USA'::text, 'ZW'::text, volatilefunction()])) + Rows Removed by Filter: 5 + Columnar Projected Columns: id, country +(4 rows) + +SELECT * FROM pushdown_test WHERE country IN ('USA', 'ZW', volatileFunction()); + id | country +--------------------------------------------------------------------- + 1 | AL + 7 | USA + 8 | ZW +(3 rows) + +DROP TABLE pushdown_test; diff --git a/src/test/regress/sql/columnar_chunk_filtering.sql b/src/test/regress/sql/columnar_chunk_filtering.sql index a2d2d628e..9e7d43363 100644 --- a/src/test/regress/sql/columnar_chunk_filtering.sql +++ b/src/test/regress/sql/columnar_chunk_filtering.sql @@ -445,3 +445,44 @@ RESET columnar.max_custom_scan_paths; RESET columnar.qual_pushdown_correlation_threshold; RESET columnar.planner_debug_level; DROP TABLE pushdown_test; + +-- https://github.com/citusdata/citus/issues/5803 + +CREATE TABLE pushdown_test(id int, country text) using columnar; + +BEGIN; + INSERT INTO pushdown_test VALUES(1, 'AL'); + INSERT INTO pushdown_test VALUES(2, 'AU'); +END; + +BEGIN; + INSERT INTO pushdown_test VALUES(3, 'BR'); + INSERT INTO pushdown_test VALUES(4, 'BT'); +END; + +BEGIN; + INSERT INTO pushdown_test VALUES(5, 'PK'); + INSERT INTO pushdown_test VALUES(6, 'PA'); +END; +BEGIN; + INSERT INTO pushdown_test VALUES(7, 'USA'); + INSERT INTO pushdown_test VALUES(8, 'ZW'); +END; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW'); + +SELECT id FROM pushdown_test WHERE country IN ('USA', 'BR', 'ZW'); + +-- test for volatile functions with IN +CREATE FUNCTION volatileFunction() returns TEXT language plpgsql AS +$$ +BEGIN + return 'AL'; +END; +$$; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM pushdown_test WHERE country IN ('USA', 'ZW', volatileFunction()); + +SELECT * FROM pushdown_test WHERE country IN ('USA', 'ZW', volatileFunction()); + +DROP TABLE pushdown_test;