From 68335285b4220338366c4b97392966450f0b96f7 Mon Sep 17 00:00:00 2001 From: Onur Tirtir Date: Sat, 11 Sep 2021 17:13:06 +0300 Subject: [PATCH] Columnar CustomScan: Pushdown BoolExpr's as we do before --- src/backend/columnar/columnar_customscan.c | 176 ++++++++++-- src/test/regress/bin/normalize.sed | 3 + .../expected/columnar_chunk_filtering.out | 251 +++++++++++++++++- .../expected/columnar_chunk_filtering_0.out | 251 +++++++++++++++++- .../regress/sql/columnar_chunk_filtering.sql | 71 +++++ 5 files changed, 728 insertions(+), 24 deletions(-) diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c index 101f0223f..e2451f89d 100644 --- a/src/backend/columnar/columnar_customscan.c +++ b/src/backend/columnar/columnar_customscan.c @@ -605,10 +605,11 @@ RelationIdGetNumberOfAttributes(Oid relationId) /* * CheckVarStats() checks whether a qual involving this Var is likely to be * useful based on the correlation stats. If so, or if stats are unavailable, - * return true; otherwise return false. + * return true; otherwise return false and sets absVarCorrelation in case + * caller wants to use for logging purposes. */ static bool -CheckVarStats(PlannerInfo *root, Var *var, Oid sortop) +CheckVarStats(PlannerInfo *root, Var *var, Oid sortop, float4 *absVarCorrelation) { /* * Collect isunique, ndistinct, and varCorrelation. @@ -642,6 +643,14 @@ CheckVarStats(PlannerInfo *root, Var *var, Oid sortop) */ if (Abs(varCorrelation) < ColumnarQualPushdownCorrelationThreshold) { + if (absVarCorrelation) + { + /* + * Report absVarCorrelation if caller wants to know why given + * var is rejected. + */ + *absVarCorrelation = Abs(varCorrelation); + } return false; } @@ -674,7 +683,7 @@ ExprReferencesRelid(Expr *expr, Index relid) /* - * CheckPushdownClause tests to see if clause is a candidate for pushing down + * ExtractPushdownClause extracts an Expr node from given clause for pushing down * into the given rel (including join clauses). This test may not be exact in * all cases; it's used to reduce the search space for parameterization. * @@ -683,19 +692,134 @@ ExprReferencesRelid(Expr *expr, Index relid) * and that doesn't seem worth the effort. Here we just look for "Var op Expr" * or "Expr op Var", where Var references rel and Expr references other rels * (or no rels at all). + * + * Moreover, this function also looks into BoolExpr's to recursively extract + * pushdownable OpExpr's of them: + * i) AND_EXPR: + * Take pushdownable args of AND expressions by ignoring the other args. + * ii) OR_EXPR: + * Ignore the whole OR expression if we cannot exract a pushdownable Expr + * from one of its args. + * iii) NOT_EXPR: + * Simply ignore NOT expressions since we don't expect to see them before + * an expression that we can pushdown, see the comment in function. + * + * The reasoning for those three rules could also be summarized as such; + * for any expression that we cannot push-down, we must assume that it + * evaluates to true. + * + * For example, given following WHERE clause: + * ( + * (a > random() OR a < 30) + * AND + * a < 200 + * ) OR + * ( + * a = 300 + * OR + * a > 400 + * ); + * Even if we can pushdown (a < 30), we cannot pushdown (a > random() OR a < 30) + * due to (a > random()). However, we can pushdown (a < 200), so we extract + * (a < 200) from the lhs of the top level OR expression. + * + * For the rhs of the top level OR expression, since we can pushdown both (a = 300) + * and (a > 400), we take this part as is. + * + * Finally, since both sides of the top level OR expression yielded pushdownable + * expressions, we will pushdown the following: + * (a < 200) OR ((a = 300) OR (a > 400)) */ -static bool -CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause) +static Expr * +ExtractPushdownClause(PlannerInfo *root, RelOptInfo *rel, Node *node) { - if (!IsA(clause, OpExpr) || list_length(((OpExpr *) clause)->args) != 2) + CHECK_FOR_INTERRUPTS(); + check_stack_depth(); + + if (node == NULL) + { + return NULL; + } + + if (IsA(node, BoolExpr)) + { + BoolExpr *boolExpr = castNode(BoolExpr, node); + if (boolExpr->boolop == NOT_EXPR) + { + /* + * Standard planner should have already applied de-morgan rule to + * simple NOT expressions. If we encounter with such an expression + * here, then it can't be a pushdownable one, such as: + * WHERE id NOT IN (SELECT id FROM something). + */ + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "must not contain a subplan"))); + return NULL; + } + + List *pushdownableArgs = NIL; + + Node *boolExprArg = NULL; + foreach_ptr(boolExprArg, boolExpr->args) + { + Expr *pushdownableArg = ExtractPushdownClause(root, rel, + (Node *) boolExprArg); + if (pushdownableArg) + { + pushdownableArgs = lappend(pushdownableArgs, pushdownableArg); + } + else if (boolExpr->boolop == OR_EXPR) + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "all arguments of an OR expression must be " + "pushdownable but one of them was not, due " + "to the reason given above"))); + return NULL; + } + + /* simply skip AND args that we cannot pushdown */ + } + + int npushdownableArgs = list_length(pushdownableArgs); + if (npushdownableArgs == 0) + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "none of the arguments were pushdownable, " + "due to the reason(s) given above "))); + return NULL; + } + else if (npushdownableArgs == 1) + { + return (Expr *) linitial(pushdownableArgs); + } + + if (boolExpr->boolop == AND_EXPR) + { + return make_andclause(pushdownableArgs); + } + else if (boolExpr->boolop == OR_EXPR) + { + return make_orclause(pushdownableArgs); + } + else + { + /* already discarded NOT expr, so should not be reachable */ + return NULL; + } + } + + if (!IsA(node, OpExpr) || list_length(((OpExpr *) node)->args) != 2) { ereport(ColumnarPlannerDebugLevel, (errmsg("columnar planner: cannot push down clause: " "must be binary operator expression"))); - return false; + return NULL; } - OpExpr *opExpr = castNode(OpExpr, clause); + OpExpr *opExpr = castNode(OpExpr, node); Expr *lhs = list_nth(opExpr->args, 0); Expr *rhs = list_nth(opExpr->args, 1); @@ -721,15 +845,15 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause) "must match 'Var Expr' or 'Expr Var'"), errhint("Var must only reference this rel, " "and Expr must not reference this rel"))); - return false; + return NULL; } if (varSide->varattno <= 0) { ereport(ColumnarPlannerDebugLevel, (errmsg("columnar planner: cannot push down clause: " - "var is whole-row reference"))); - return false; + "var is whole-row reference or system column"))); + return NULL; } if (contain_volatile_functions((Node *) exprSide)) @@ -737,7 +861,7 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause) ereport(ColumnarPlannerDebugLevel, (errmsg("columnar planner: cannot push down clause: " "expr contains volatile functions"))); - return false; + return NULL; } /* only the default opclass is used for qual pushdown. */ @@ -753,7 +877,7 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause) (errmsg("columnar planner: cannot push down clause: " "cannot find default btree opclass and opfamily for type: %s", format_type_be(varSide->vartype)))); - return false; + return NULL; } if (!op_in_opfamily(opExpr->opno, varOpFamily)) @@ -762,7 +886,7 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause) (errmsg("columnar planner: cannot push down clause: " "operator %d not a member of opfamily %d", opExpr->opno, varOpFamily))); - return false; + return NULL; } Oid sortop = get_opfamily_member(varOpFamily, varOpcInType, @@ -773,15 +897,20 @@ CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause) * Check that statistics on the Var support the utility of this * clause. */ - if (!CheckVarStats(root, varSide, sortop)) + float4 absVarCorrelation = 0; + if (!CheckVarStats(root, varSide, sortop, &absVarCorrelation)) { ereport(ColumnarPlannerDebugLevel, (errmsg("columnar planner: cannot push down clause: " - "var attribute %d is uncorrelated", varSide->varattno))); - return false; + "absolute correlation (%.3f) of var attribute %d is " + "smaller than the value configured in " + "\"columnar.qual_pushdown_correlation_threshold\" " + "(%.3f)", absVarCorrelation, varSide->varattno, + ColumnarQualPushdownCorrelationThreshold))); + return NULL; } - return true; + return (Expr *) node; } @@ -806,12 +935,19 @@ FilterPushdownClauses(PlannerInfo *root, RelOptInfo *rel, List *inputClauses) * there's something we should do with pseudoconstants here. */ if (rinfo->pseudoconstant || - !bms_is_member(rel->relid, rinfo->required_relids) || - !CheckPushdownClause(root, rel, rinfo->clause)) + !bms_is_member(rel->relid, rinfo->required_relids)) { continue; } + Expr *pushdownableExpr = ExtractPushdownClause(root, rel, (Node *) rinfo->clause); + if (!pushdownableExpr) + { + continue; + } + + rinfo = copyObject(rinfo); + rinfo->clause = pushdownableExpr; filteredClauses = lappend(filteredClauses, rinfo); } diff --git a/src/test/regress/bin/normalize.sed b/src/test/regress/bin/normalize.sed index 24efe4866..b6e870786 100644 --- a/src/test/regress/bin/normalize.sed +++ b/src/test/regress/bin/normalize.sed @@ -246,3 +246,6 @@ s/TRIM\(BOTH FROM value\)/btrim\(value\)/g s/pg14\.idx.*/pg14\.xxxxx/g s/CREATE TABLESPACE test_tablespace LOCATION.*/CREATE TABLESPACE test_tablespace LOCATION XXXX/g + +# columnar log for var correlation +s/(.*absolute correlation \()([0,1]\.[0-9]+)(\) of var attribute [0-9]+ is smaller than.*)/\1X\.YZ\3/g diff --git a/src/test/regress/expected/columnar_chunk_filtering.out b/src/test/regress/expected/columnar_chunk_filtering.out index 90c201ae9..980b2454f 100644 --- a/src/test/regress/expected/columnar_chunk_filtering.out +++ b/src/test/regress/expected/columnar_chunk_filtering.out @@ -645,7 +645,7 @@ alter table coltest add column x5 int default (random()*20000)::int; analyze coltest; -- test that expressions on whole-row references are not pushed down select * from coltest where coltest = (1,1,1,1); -NOTICE: columnar planner: cannot push down clause: var is whole-row reference +NOTICE: columnar planner: cannot push down clause: var is whole-row reference or system column NOTICE: columnar planner: adding CustomScan path for coltest DETAIL: unparameterized; 0 clauses pushed down id | x1 | x2 | x3 | x5 @@ -655,7 +655,7 @@ DETAIL: unparameterized; 0 clauses pushed down -- test that expressions on uncorrelated attributes are not pushed down set columnar.qual_pushdown_correlation to default; select * from coltest where x5 = 23484; -NOTICE: columnar planner: cannot push down clause: var attribute 5 is uncorrelated +NOTICE: columnar planner: cannot push down clause: absolute correlation (X.YZ) of var attribute 5 is smaller than the value configured in "columnar.qual_pushdown_correlation_threshold" (0.900) NOTICE: columnar planner: adding CustomScan path for coltest DETAIL: unparameterized; 0 clauses pushed down id | x1 | x2 | x3 | x5 @@ -819,3 +819,250 @@ select * from numrange_test natural join numrange_test2 order by nr; DROP TABLE atest1, atest2, t1, t2, t3, numrange_test, numrange_test2; set default_table_access_method to default; +set columnar.planner_debug_level to notice; +BEGIN; + SET LOCAL columnar.stripe_row_limit = 2000; + SET LOCAL columnar.chunk_group_row_limit = 1000; + create table pushdown_test (a int, b int) using columnar; + insert into pushdown_test values (generate_series(1, 200000)); +COMMIT; +SET columnar.max_custom_scan_paths TO 50; +SET columnar.qual_pushdown_correlation_threshold TO 0.0; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556; +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=2 loops=1) + Filter: ((a = 204356) OR (a = 104356) OR (a = 76556)) + Rows Removed by Filter: 1998 + Columnar Projected Columns: a + Columnar Chunk Group Filters: ((a = 204356) OR (a = 104356) OR (a = 76556)) + Columnar Chunk Groups Removed by Filter: 198 +(7 rows) + +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556; +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 180912 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556; +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1) + Filter: ((a = 194356) OR (a = 104356) OR (a = 76556)) + Rows Removed by Filter: 2997 + Columnar Projected Columns: a + Columnar Chunk Group Filters: ((a = 194356) OR (a = 104356) OR (a = 76556)) + Columnar Chunk Groups Removed by Filter: 197 +(7 rows) + +SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556; +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 375268 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a > a*-1 + b; +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=0 loops=1) + Filter: ((a = 204356) OR (a > ((a * '-1'::integer) + b))) + Rows Removed by Filter: 200000 + Columnar Projected Columns: a, b +(5 rows) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000); +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=38998 loops=1) + Filter: (((a > 1000) AND (a < 10000)) OR ((a > 20000) AND (a < 50000))) + Rows Removed by Filter: 2 + Columnar Projected Columns: a + Columnar Chunk Group Filters: (((a > 1000) AND (a < 10000)) OR ((a > 20000) AND (a < 50000))) + Columnar Chunk Groups Removed by Filter: 161 +(7 rows) + +SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000); +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 1099459500 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: none of the arguments were pushdownable, due to the reason(s) given above +NOTICE: columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=200000 loops=1) + Filter: ((((a)::double precision > random()) AND (a < (2 * a))) OR (a > 100)) + Columnar Projected Columns: a +(4 rows) + +SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: none of the arguments were pushdownable, due to the reason(s) given above +NOTICE: columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down + sum +--------------------------------------------------------------------- + 20000100000 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=3010 loops=1) + Filter: ((((a)::double precision > random()) AND (a <= 2000)) OR (a > 198990)) + Rows Removed by Filter: 990 + Columnar Projected Columns: a + Columnar Chunk Group Filters: ((a <= 2000) OR (a > 198990)) + Columnar Chunk Groups Removed by Filter: 196 +(7 rows) + +SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 203491455 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where +( + a > random() + and + ( + (a < 200 and a not in (select a from pushdown_test)) or + (a > 1000 and a < 2000) + ) +) +or +(a > 200000-2010); +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must not contain a subplan +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=3009 loops=1) + Filter: ((((a)::double precision > random()) AND (((a < 200) AND (NOT (SubPlan 1))) OR ((a > 1000) AND (a < 2000)))) OR (a > 197990)) + Rows Removed by Filter: 1991 + Columnar Projected Columns: a + Columnar Chunk Group Filters: (((a < 200) OR ((a > 1000) AND (a < 2000))) OR (a > 197990)) + Columnar Chunk Groups Removed by Filter: 195 + SubPlan 1 + -> Materialize (actual rows=100 loops=199) + -> Custom Scan (ColumnarScan) on pushdown_test pushdown_test_1 (actual rows=199 loops=1) + Columnar Projected Columns: a +(11 rows) + +SELECT sum(a) FROM pushdown_test where +( + a > random() + and + ( + (a < 200 and a not in (select a from pushdown_test)) or + (a > 1000 and a < 2000) + ) +) +or +(a > 200000-2010); +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must not contain a subplan +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 401479455 +(1 row) + +create function stable_1(arg int) returns int language plpgsql STRICT IMMUTABLE as +$$ BEGIN RETURN 1+arg; END; $$; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000)); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=0 loops=1) + Filter: ((a < 6001) AND ((a)::double precision = random()) AND (a < stable_1(a))) + Rows Removed by Filter: 6000 + Columnar Projected Columns: a + Columnar Chunk Group Filters: (a < 6001) + Columnar Chunk Groups Removed by Filter: 194 +(7 rows) + +SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000)); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + +(1 row) + +RESET columnar.max_custom_scan_paths; +RESET columnar.qual_pushdown_correlation_threshold; +RESET columnar.planner_debug_level; +DROP TABLE pushdown_test; diff --git a/src/test/regress/expected/columnar_chunk_filtering_0.out b/src/test/regress/expected/columnar_chunk_filtering_0.out index fe507bc6c..b52581c0e 100644 --- a/src/test/regress/expected/columnar_chunk_filtering_0.out +++ b/src/test/regress/expected/columnar_chunk_filtering_0.out @@ -645,7 +645,7 @@ alter table coltest add column x5 int default (random()*20000)::int; analyze coltest; -- test that expressions on whole-row references are not pushed down select * from coltest where coltest = (1,1,1,1); -NOTICE: columnar planner: cannot push down clause: var is whole-row reference +NOTICE: columnar planner: cannot push down clause: var is whole-row reference or system column NOTICE: columnar planner: adding CustomScan path for coltest DETAIL: unparameterized; 0 clauses pushed down id | x1 | x2 | x3 | x5 @@ -655,7 +655,7 @@ DETAIL: unparameterized; 0 clauses pushed down -- test that expressions on uncorrelated attributes are not pushed down set columnar.qual_pushdown_correlation to default; select * from coltest where x5 = 23484; -NOTICE: columnar planner: cannot push down clause: var attribute 5 is uncorrelated +NOTICE: columnar planner: cannot push down clause: absolute correlation (X.YZ) of var attribute 5 is smaller than the value configured in "columnar.qual_pushdown_correlation_threshold" (0.900) NOTICE: columnar planner: adding CustomScan path for coltest DETAIL: unparameterized; 0 clauses pushed down id | x1 | x2 | x3 | x5 @@ -819,3 +819,250 @@ select * from numrange_test natural join numrange_test2 order by nr; DROP TABLE atest1, atest2, t1, t2, t3, numrange_test, numrange_test2; set default_table_access_method to default; +set columnar.planner_debug_level to notice; +BEGIN; + SET LOCAL columnar.stripe_row_limit = 2000; + SET LOCAL columnar.chunk_group_row_limit = 1000; + create table pushdown_test (a int, b int) using columnar; + insert into pushdown_test values (generate_series(1, 200000)); +COMMIT; +SET columnar.max_custom_scan_paths TO 50; +SET columnar.qual_pushdown_correlation_threshold TO 0.0; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556; +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=2 loops=1) + Filter: ((a = 204356) OR (a = 104356) OR (a = 76556)) + Rows Removed by Filter: 1998 + Columnar Projected Columns: a + Columnar Chunk Group Filters: ((a = 204356) OR (a = 104356) OR (a = 76556)) + Columnar Chunk Groups Removed by Filter: 198 +(7 rows) + +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556; +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 180912 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556; +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=3 loops=1) + Filter: ((a = 194356) OR (a = 104356) OR (a = 76556)) + Rows Removed by Filter: 2997 + Columnar Projected Columns: a + Columnar Chunk Group Filters: ((a = 194356) OR (a = 104356) OR (a = 76556)) + Columnar Chunk Groups Removed by Filter: 197 +(7 rows) + +SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556; +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 375268 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a > a*-1 + b; +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=0 loops=1) + Filter: ((a = 204356) OR (a > ((a * '-1'::integer) + b))) + Rows Removed by Filter: 200000 + Columnar Projected Columns: a, b +(5 rows) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000); +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=38998 loops=1) + Filter: (((a > 1000) AND (a < 10000)) OR ((a > 20000) AND (a < 50000))) + Rows Removed by Filter: 2 + Columnar Projected Columns: a + Columnar Chunk Group Filters: (((a > 1000) AND (a < 10000)) OR ((a > 20000) AND (a < 50000))) + Columnar Chunk Groups Removed by Filter: 161 +(7 rows) + +SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000); +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 1099459500 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: none of the arguments were pushdownable, due to the reason(s) given above +NOTICE: columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=200000 loops=1) + Filter: ((((a)::double precision > random()) AND (a < (2 * a))) OR (a > 100)) + Columnar Projected Columns: a +(4 rows) + +SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: none of the arguments were pushdownable, due to the reason(s) given above +NOTICE: columnar planner: cannot push down clause: all arguments of an OR expression must be pushdownable but one of them was not, due to the reason given above +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down + sum +--------------------------------------------------------------------- + 20000100000 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=3010 loops=1) + Filter: ((((a)::double precision > random()) AND (a <= 2000)) OR (a > 198990)) + Rows Removed by Filter: 990 + Columnar Projected Columns: a + Columnar Chunk Group Filters: ((a <= 2000) OR (a > 198990)) + Columnar Chunk Groups Removed by Filter: 196 +(7 rows) + +SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 203491455 +(1 row) + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where +( + a > random() + and + ( + (a < 200 and a not in (select a from pushdown_test)) or + (a > 1000 and a < 2000) + ) +) +or +(a > 200000-2010); +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must not contain a subplan +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=3009 loops=1) + Filter: ((((a)::double precision > random()) AND (((a < 200) AND (NOT (SubPlan 1))) OR ((a > 1000) AND (a < 2000)))) OR (a > 197990)) + Rows Removed by Filter: 1991 + Columnar Projected Columns: a + Columnar Chunk Group Filters: (((a < 200) OR ((a > 1000) AND (a < 2000))) OR (a > 197990)) + Columnar Chunk Groups Removed by Filter: 195 + SubPlan 1 + -> Materialize (actual rows=100 loops=199) + -> Custom Scan (ColumnarScan) on pushdown_test pushdown_test_1 (actual rows=199 loops=1) + Columnar Projected Columns: a +(11 rows) + +SELECT sum(a) FROM pushdown_test where +( + a > random() + and + ( + (a < 200 and a not in (select a from pushdown_test)) or + (a > 1000 and a < 2000) + ) +) +or +(a > 200000-2010); +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must not contain a subplan +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + 401479455 +(1 row) + +create function stable_1(arg int) returns int language plpgsql STRICT IMMUTABLE as +$$ BEGIN RETURN 1+arg; END; $$; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000)); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on pushdown_test (actual rows=0 loops=1) + Filter: ((a < 6001) AND ((a)::double precision = random()) AND (a < stable_1(a))) + Rows Removed by Filter: 6000 + Columnar Projected Columns: a + Columnar Chunk Group Filters: (a < 6001) + Columnar Chunk Groups Removed by Filter: 194 +(7 rows) + +SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000)); +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for pushdown_test +DETAIL: unparameterized; 1 clauses pushed down + sum +--------------------------------------------------------------------- + +(1 row) + +RESET columnar.max_custom_scan_paths; +RESET columnar.qual_pushdown_correlation_threshold; +RESET columnar.planner_debug_level; +DROP TABLE pushdown_test; diff --git a/src/test/regress/sql/columnar_chunk_filtering.sql b/src/test/regress/sql/columnar_chunk_filtering.sql index aea13839d..a2d2d628e 100644 --- a/src/test/regress/sql/columnar_chunk_filtering.sql +++ b/src/test/regress/sql/columnar_chunk_filtering.sql @@ -374,3 +374,74 @@ select * from numrange_test natural join numrange_test2 order by nr; DROP TABLE atest1, atest2, t1, t2, t3, numrange_test, numrange_test2; set default_table_access_method to default; + +set columnar.planner_debug_level to notice; + +BEGIN; + SET LOCAL columnar.stripe_row_limit = 2000; + SET LOCAL columnar.chunk_group_row_limit = 1000; + create table pushdown_test (a int, b int) using columnar; + insert into pushdown_test values (generate_series(1, 200000)); +COMMIT; + +SET columnar.max_custom_scan_paths TO 50; +SET columnar.qual_pushdown_correlation_threshold TO 0.0; + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556; +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a = 104356 or a = 76556; + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556; +SELECT sum(a) FROM pushdown_test WHERE a = 194356 or a = 104356 or a = 76556; + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test WHERE a = 204356 or a > a*-1 + b; + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000); +SELECT sum(a) FROM pushdown_test where (a > 1000 and a < 10000) or (a > 20000 and a < 50000); + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100); +SELECT sum(a) FROM pushdown_test where (a > random() and a < 2*a) or (a > 100); + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010); +SELECT sum(a) FROM pushdown_test where (a > random() and a <= 2000) or (a > 200000-1010); + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where +( + a > random() + and + ( + (a < 200 and a not in (select a from pushdown_test)) or + (a > 1000 and a < 2000) + ) +) +or +(a > 200000-2010); +SELECT sum(a) FROM pushdown_test where +( + a > random() + and + ( + (a < 200 and a not in (select a from pushdown_test)) or + (a > 1000 and a < 2000) + ) +) +or +(a > 200000-2010); + +create function stable_1(arg int) returns int language plpgsql STRICT IMMUTABLE as +$$ BEGIN RETURN 1+arg; END; $$; + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000)); +SELECT sum(a) FROM pushdown_test where (a = random() and a < stable_1(a) and a < stable_1(6000)); + +RESET columnar.max_custom_scan_paths; +RESET columnar.qual_pushdown_correlation_threshold; +RESET columnar.planner_debug_level; +DROP TABLE pushdown_test;