From 4718b6bcdfc6778a7ac00d9d26a7a4e24cf60acb Mon Sep 17 00:00:00 2001 From: jeff-davis Date: Thu, 2 Sep 2021 22:22:48 -0700 Subject: [PATCH] Generate parameterized paths for columnar scans. (#5172) Allow ColumnarScans to push down join quals by generating parameterized paths. This significantly expands the utility of chunk group filtering, making a ColumnarScan behave similar to an index when on the inner of a nested loop join. Also, evaluate all parameters on beginscan/rescan, which also works for external parameters. Fixes #4488. --- src/backend/columnar/columnar_customscan.c | 915 ++++++++++++++++-- src/backend/columnar/columnar_reader.c | 3 +- src/backend/columnar/columnar_tableam.c | 6 +- src/include/columnar/columnar.h | 2 +- .../expected/columnar_chunk_filtering.out | 484 ++++++++- .../expected/columnar_chunk_filtering_0.out | 748 ++++++++++++++ src/test/regress/expected/columnar_cursor.out | 10 +- src/test/regress/expected/columnar_join.out | 10 +- .../expected/columnar_partitioning.out | 6 +- .../expected/columnar_partitioning_1.out | 6 +- src/test/regress/expected/columnar_paths.out | 51 + src/test/regress/expected/columnar_query.out | 7 +- .../expected/columnar_transactions.out | 32 +- .../regress/sql/columnar_chunk_filtering.sql | 164 ++++ src/test/regress/sql/columnar_paths.sql | 33 + 15 files changed, 2340 insertions(+), 137 deletions(-) create mode 100644 src/test/regress/expected/columnar_chunk_filtering_0.out diff --git a/src/backend/columnar/columnar_customscan.c b/src/backend/columnar/columnar_customscan.c index ddb78dc98..0eb2e8869 100644 --- a/src/backend/columnar/columnar_customscan.c +++ b/src/backend/columnar/columnar_customscan.c @@ -14,10 +14,17 @@ #include "postgres.h" +#include + #include "access/amapi.h" #include "access/skey.h" +#include "catalog/pg_am.h" +#include "catalog/pg_statistic.h" +#include "commands/defrem.h" +#include "miscadmin.h" #include "nodes/extensible.h" #include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" #include "nodes/pg_list.h" #include "nodes/plannodes.h" #include "optimizer/cost.h" @@ -25,8 +32,11 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/restrictinfo.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" #include "utils/relcache.h" #include "utils/ruleutils.h" +#include "utils/selfuncs.h" #include "utils/spccache.h" #include "columnar/columnar.h" @@ -43,6 +53,7 @@ typedef struct ColumnarScanState { CustomScanState custom_scanstate; /* must be first field */ + ExprContext *css_RuntimeContext; List *qual; } ColumnarScanState; @@ -63,11 +74,19 @@ static Cost ColumnarIndexScanAddStartupCost(RelOptInfo *rel, Oid relationId, IndexPath *indexPath); static Cost ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel, Oid relationId, IndexPath *indexPath); -static void RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path); +static void CostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path); static int RelationIdGetNumberOfAttributes(Oid relationId); -static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, - RangeTblEntry *rte); -static Cost ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead); +static void AddColumnarScanPaths(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte); +static void AddColumnarScanPathsRec(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Relids paramRelids, + Relids candidateRelids, + int depthLimit); +static void AddColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, + RangeTblEntry *rte, Relids required_relids); +static void CostColumnarScan(CustomPath *cpath, PlannerInfo *root, + RelOptInfo *rel, Oid relationId, + int numberOfColumnsRead, int nClauses); static Cost ColumnarPerStripeScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead); static uint64 ColumnarTableStripeCount(Oid relationId); @@ -80,23 +99,32 @@ static Plan * ColumnarScanPath_PlanCustomPath(PlannerInfo *root, static Node * ColumnarScan_CreateCustomScanState(CustomScan *cscan); -static void ColumnarScan_BeginCustomScan(CustomScanState *node, EState *estate, int - eflags); +static void ColumnarScan_BeginCustomScan(CustomScanState *node, EState *estate, + int eflags); static TupleTableSlot * ColumnarScan_ExecCustomScan(CustomScanState *node); static void ColumnarScan_EndCustomScan(CustomScanState *node); static void ColumnarScan_ReScanCustomScan(CustomScanState *node); static void ColumnarScan_ExplainCustomScan(CustomScanState *node, List *ancestors, ExplainState *es); -static const char * ColumnarScanProjectedColumnsStr(ColumnarScanState *columnarScanState, - List *ancestors, ExplainState *es); +static const char * ColumnarPushdownClausesStr(List *context, List *clauses); +static const char * ColumnarProjectedColumnsStr(List *context, + List *projectedColumns); static List * ColumnarVarNeeded(ColumnarScanState *columnarScanState); static Bitmapset * ColumnarAttrNeeded(ScanState *ss); +#if PG_VERSION_NUM >= 130000 +static List * set_deparse_context_planstate(List *dpcontext, Node *node, + List *ancestors); +#endif + /* saved hook value in case of unload */ static set_rel_pathlist_hook_type PreviousSetRelPathlistHook = NULL; static bool EnableColumnarCustomScan = true; static bool EnableColumnarQualPushdown = true; +static double ColumnarQualPushdownCorrelationThreshold = 0.9; +static int ColumnarMaxCustomScanPaths = 64; +static int ColumnarPlannerDebugLevel = DEBUG3; const struct CustomPathMethods ColumnarScanPathMethods = { @@ -120,6 +148,20 @@ const struct CustomExecMethods ColumnarScanExecuteMethods = { .ExplainCustomScan = ColumnarScan_ExplainCustomScan, }; +static const struct config_enum_entry debug_level_options[] = { + { "debug5", DEBUG5, false }, + { "debug4", DEBUG4, false }, + { "debug3", DEBUG3, false }, + { "debug2", DEBUG2, false }, + { "debug1", DEBUG1, false }, + { "debug", DEBUG2, true }, + { "info", INFO, false }, + { "notice", NOTICE, false }, + { "warning", WARNING, false }, + { "log", LOG, false }, + { NULL, 0, false } +}; + /* * columnar_customscan_init installs the hook required to intercept the postgres planner and @@ -152,6 +194,44 @@ columnar_customscan_init() PGC_USERSET, GUC_NO_SHOW_ALL, NULL, NULL, NULL); + DefineCustomRealVariable( + "columnar.qual_pushdown_correlation_threshold", + gettext_noop("Correlation threshold to attempt to push a qual " + "referencing the given column. A value of 0 means " + "attempt to push down all quals, even if the column " + "is uncorrelated."), + NULL, + &ColumnarQualPushdownCorrelationThreshold, + 0.9, + 0.0, + 1.0, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomIntVariable( + "columnar.max_custom_scan_paths", + gettext_noop("Maximum number of custom scan paths to generate " + "for a columnar table when planning."), + NULL, + &ColumnarMaxCustomScanPaths, + 64, + 1, + 1024, + PGC_USERSET, + GUC_NO_SHOW_ALL, + NULL, NULL, NULL); + DefineCustomEnumVariable( + "columnar.planner_debug_level", + "Message level for columnar planning information.", + NULL, + &ColumnarPlannerDebugLevel, + DEBUG3, + debug_level_options, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); RegisterCustomScanMethods(&ColumnarScanScanMethods); } @@ -208,8 +288,6 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, if (EnableColumnarCustomScan) { - Path *customPath = CreateColumnarScanPath(root, rel, rte); - ereport(DEBUG1, (errmsg("pathlist hook for columnar table am"))); /* @@ -226,7 +304,7 @@ ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti, * SeqPath thinking that its cost would be equal to ColumnarCustomScan. */ RemovePathsByPredicate(rel, IsNotIndexPath); - add_path(rel, customPath); + AddColumnarScanPaths(root, rel, rte); } } RelationClose(relation); @@ -277,7 +355,7 @@ CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId) Relids requiredOuter = rel->lateral_relids; Path *path = create_seqscan_path(root, rel, requiredOuter, parallelWorkers); - RecostColumnarSeqPath(rel, relationId, path); + CostColumnarSeqPath(rel, relationId, path); return path; } @@ -305,7 +383,7 @@ RecostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId) } else if (path->pathtype == T_SeqScan) { - RecostColumnarSeqPath(rel, relationId, path); + CostColumnarSeqPath(rel, relationId, path); } } } @@ -457,11 +535,11 @@ ColumnarIndexScanAddTotalCost(PlannerInfo *root, RelOptInfo *rel, /* - * RecostColumnarSeqPath re-costs given seq path for columnar table with + * CostColumnarSeqPath sets costs given seq path for columnar table with * relationId. */ static void -RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path) +CostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path) { if (!enable_seqscan) { @@ -469,17 +547,16 @@ RecostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path) return; } - path->startup_cost = 0; - /* - * Seq scan doesn't support projection pushdown, so we will read all the - * columns. - * Also note that seq scan doesn't support chunk group filtering too but - * our costing model already doesn't consider chunk group filtering. + * Seq scan doesn't support projection or qual pushdown, so we will read + * all the stripes and all the columns. */ + double stripesToRead = ColumnarTableStripeCount(relationId); int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId); - path->total_cost = path->startup_cost + - ColumnarScanCost(rel, relationId, numberOfColumnsRead); + + path->startup_cost = 0; + path->total_cost = stripesToRead * + ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead); } @@ -497,15 +574,520 @@ RelationIdGetNumberOfAttributes(Oid relationId) } -static Path * -CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +/* + * CheckVarStats() checks whether a qual involving this Var is likely to be + * useful based on the correlation stats. If so, or if stats are unavailable, + * return true; otherwise return false. + */ +static bool +CheckVarStats(PlannerInfo *root, Var *var, Oid sortop) +{ + /* + * Collect isunique, ndistinct, and varCorrelation. + */ + VariableStatData varStatData; + examine_variable(root, (Node *) var, var->varno, &varStatData); + if (varStatData.rel == NULL || + !HeapTupleIsValid(varStatData.statsTuple)) + { + return true; + } + + AttStatsSlot sslot; + if (!get_attstatsslot(&sslot, varStatData.statsTuple, + STATISTIC_KIND_CORRELATION, sortop, + ATTSTATSSLOT_NUMBERS)) + { + ReleaseVariableStats(varStatData); + return true; + } + + Assert(sslot.nnumbers == 1); + + float4 varCorrelation = sslot.numbers[0]; + + ReleaseVariableStats(varStatData); + + /* + * If the Var is not highly correlated, then the chunk's min/max bounds + * will be nearly useless. + */ + if (Abs(varCorrelation) < ColumnarQualPushdownCorrelationThreshold) + { + return false; + } + + return true; +} + + +/* + * ExprReferencesRelid returns true if any of the Expr's Vars refer to the + * given relid; false otherwise. + */ +static bool +ExprReferencesRelid(Expr *expr, Index relid) +{ + List *exprVars = pull_var_clause( + (Node *) expr, PVC_RECURSE_AGGREGATES | + PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS); + ListCell *lc; + foreach(lc, exprVars) + { + Var *var = (Var *) lfirst(lc); + if (var->varno == relid) + { + return true; + } + } + + return false; +} + + +/* + * CheckPushdownClause tests to see if clause is a candidate for pushing down + * into the given rel (including join clauses). This test may not be exact in + * all cases; it's used to reduce the search space for parameterization. + * + * Note that we don't try to handle cases like "Var + ExtParam = 3". That + * would require going through eval_const_expression after parameter binding, + * and that doesn't seem worth the effort. Here we just look for "Var op Expr" + * or "Expr op Var", where Var references rel and Expr references other rels + * (or no rels at all). + */ +static bool +CheckPushdownClause(PlannerInfo *root, RelOptInfo *rel, Expr *clause) +{ + if (!IsA(clause, OpExpr) || list_length(((OpExpr *) clause)->args) != 2) + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "must be binary operator expression"))); + return false; + } + + OpExpr *opExpr = castNode(OpExpr, clause); + Expr *lhs = list_nth(opExpr->args, 0); + Expr *rhs = list_nth(opExpr->args, 1); + + Var *varSide; + Expr *exprSide; + + if (IsA(lhs, Var) && ((Var *) lhs)->varno == rel->relid && + !ExprReferencesRelid((Expr *) rhs, rel->relid)) + { + varSide = castNode(Var, lhs); + exprSide = rhs; + } + else if (IsA(rhs, Var) && ((Var *) rhs)->varno == rel->relid && + !ExprReferencesRelid((Expr *) lhs, rel->relid)) + { + varSide = castNode(Var, rhs); + exprSide = lhs; + } + else + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "must match 'Var Expr' or 'Expr Var'"), + errhint("Var must only reference this rel, " + "and Expr must not reference this rel"))); + return false; + } + + if (varSide->varattno <= 0) + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "var is whole-row reference"))); + return false; + } + + if (contain_volatile_functions((Node *) exprSide)) + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "expr contains volatile functions"))); + return false; + } + + /* only the default opclass is used for qual pushdown. */ + Oid varOpClass = GetDefaultOpClass(varSide->vartype, BTREE_AM_OID); + Oid varOpFamily; + Oid varOpcInType; + + if (!OidIsValid(varOpClass) || + !get_opclass_opfamily_and_input_type(varOpClass, &varOpFamily, + &varOpcInType)) + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "cannot find default btree opclass and opfamily for type: %s", + format_type_be(varSide->vartype)))); + return false; + } + + if (!op_in_opfamily(opExpr->opno, varOpFamily)) + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "operator %d not a member of opfamily %d", + opExpr->opno, varOpFamily))); + return false; + } + + Oid sortop = get_opfamily_member(varOpFamily, varOpcInType, + varOpcInType, BTLessStrategyNumber); + Assert(OidIsValid(sortop)); + + /* + * Check that statistics on the Var support the utility of this + * clause. + */ + if (!CheckVarStats(root, varSide, sortop)) + { + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: cannot push down clause: " + "var attribute %d is uncorrelated", varSide->varattno))); + return false; + } + + return true; +} + + +/* + * FilterPushdownClauses filters for clauses that are candidates for pushing + * down into rel. + */ +static List * +FilterPushdownClauses(PlannerInfo *root, RelOptInfo *rel, List *inputClauses) +{ + List *filteredClauses = NIL; + ListCell *lc; + foreach(lc, inputClauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + /* + * Ignore clauses that don't refer to this rel, and pseudoconstants. + * + * XXX: A pseudoconstant may be of use, but it doesn't make sense to + * push it down because it doesn't contain any Vars. Look into if + * there's something we should do with pseudoconstants here. + */ + if (rinfo->pseudoconstant || + !bms_is_member(rel->relid, rinfo->required_relids) || + !CheckPushdownClause(root, rel, rinfo->clause)) + { + continue; + } + + filteredClauses = lappend(filteredClauses, rinfo); + } + + return filteredClauses; +} + + +/* + * PushdownJoinClauseMatches is a callback that returns true, indicating that + * we want all of the clauses from generate_implied_equalities_for_column(). + */ +static bool +PushdownJoinClauseMatches(PlannerInfo *root, RelOptInfo *rel, + EquivalenceClass *ec, EquivalenceMember *em, + void *arg) +{ + return true; +} + + +/* + * FindPushdownJoinClauses finds join clauses, including those implied by ECs, + * that may be pushed down. + */ +static List * +FindPushdownJoinClauses(PlannerInfo *root, RelOptInfo *rel) +{ + List *joinClauses = copyObject(rel->joininfo); + + /* + * Here we are generating the clauses just so we can later extract the + * interesting relids. This is somewhat wasteful, but it allows us to + * filter out joinclauses, reducing the number of relids we need to + * consider. + * + * XXX: also find additional clauses for joininfo that are implied by ECs? + */ + List *ecClauses = generate_implied_equalities_for_column( + root, rel, PushdownJoinClauseMatches, NULL, + rel->lateral_referencers); + List *allClauses = list_concat(joinClauses, ecClauses); + + return FilterPushdownClauses(root, rel, allClauses); +} + + +/* + * FindCandidateRelids identifies candidate rels for parameterization from the + * list of join clauses. + * + * Some rels cannot be considered for parameterization, such as a partitioned + * parent of the given rel. Other rels are just not useful because they don't + * appear in a join clause that could be pushed down. + */ +static Relids +FindCandidateRelids(PlannerInfo *root, RelOptInfo *rel, List *joinClauses) +{ + Relids candidateRelids = NULL; + ListCell *lc; + foreach(lc, joinClauses) + { + RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); + + candidateRelids = bms_add_members(candidateRelids, + rinfo->required_relids); + } + + candidateRelids = bms_del_members(candidateRelids, rel->relids); + candidateRelids = bms_del_members(candidateRelids, rel->lateral_relids); + return candidateRelids; +} + + +/* + * Combinations() calculates the number of combinations of n things taken k at + * a time. When the correct result is large, the calculation may produce a + * non-integer result, or overflow to inf, which caller should handle + * appropriately. + * + * Use the following two formulae from Knuth TAoCP, 1.2.6: + * (2) Combinations(n, k) = (n*(n-1)..(n-k+1)) / (k*(k-1)..1) + * (5) Combinations(n, k) = Combinations(n, n-k) + */ +static double +Combinations(int n, int k) +{ + double v = 1; + + /* + * If k is close to n, then both the numerator and the denominator are + * close to n!, and we may overflow even if the input is reasonable + * (e.g. Combinations(500, 500)). Use formula (5) to choose the smaller, + * but equivalent, k. + */ + k = Min(k, n - k); + + /* calculate numerator of formula (2) first */ + for (int i = n; i >= n - k + 1; i--) + { + v *= i; + } + + /* + * Divide by each factor in the denominator of formula (2), skipping + * division by 1. + */ + for (int i = k; i >= 2; i--) + { + v /= i; + } + + return v; +} + + +/* + * ChooseDepthLimit() calculates the depth limit for the parameterization + * search, given the number of candidate relations. + * + * The maximum number of paths generated for a given depthLimit is: + * + * Combinations(nCandidates, 0) + Combinations(nCandidates, 1) + ... + + * Combinations(nCandidates, depthLimit) + * + * There's no closed formula for a partial sum of combinations, so just keep + * increasing the depth until the number of combinations exceeds the limit. + */ +static int +ChooseDepthLimit(int nCandidates) +{ + if (!EnableColumnarQualPushdown) + { + return 0; + } + + int depth = 0; + double numPaths = 1; + + while (depth < nCandidates) + { + numPaths += Combinations(nCandidates, depth + 1); + + if (numPaths > (double) ColumnarMaxCustomScanPaths) + { + break; + } + + depth++; + } + + return depth; +} + + +/* + * AddColumnarScanPaths is the entry point for recursively generating + * parameterized paths. See AddColumnarScanPathsRec() for discussion. + */ +static void +AddColumnarScanPaths(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) +{ + List *joinClauses = FindPushdownJoinClauses(root, rel); + Relids candidateRelids = FindCandidateRelids(root, rel, joinClauses); + + int depthLimit = ChooseDepthLimit(bms_num_members(candidateRelids)); + + /* must always parameterize by lateral refs */ + Relids paramRelids = bms_copy(rel->lateral_relids); + + AddColumnarScanPathsRec(root, rel, rte, paramRelids, candidateRelids, + depthLimit); +} + + +/* + * AddColumnarScanPathsRec is a recursive function to search the + * parameterization space and add CustomPaths for columnar scans. + * + * The set paramRelids is the parameterization at the current level, and + * candidateRelids is the set from which we draw to generate paths with + * greater parameterization. + * + * Columnar tables resemble indexes because of the ability to push down + * quals. Ordinary quals, such as x = 7, can be pushed down easily. But join + * quals of the form "x = y" (where "y" comes from another rel) require the + * proper parameterization. + * + * Paths that require more outer rels can push down more join clauses that + * depend on those outer rels. But requiring more outer rels gives the planner + * fewer options for the shape of the plan. That means there is a trade-off, + * and we should generate plans of various parameterizations, then let the + * planner choose. We always need to generate one minimally-parameterized path + * (parameterized only by lateral refs, if present) to make sure that at least + * one path can be chosen. Then, we generate as many parameterized paths as we + * reasonably can. + * + * The set of all possible parameterizations is the power set of + * candidateRelids. The power set has cardinality 2^N, where N is the + * cardinality of candidateRelids. To avoid creating a huge number of paths, + * limit the depth of the search; the depthLimit is equivalent to the maximum + * number of required outer rels (beyond the minimal parameterization) for the + * path. A depthLimit of zero means that only the minimally-parameterized path + * will be generated. + */ +static void +AddColumnarScanPathsRec(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte, + Relids paramRelids, Relids candidateRelids, + int depthLimit) +{ + CHECK_FOR_INTERRUPTS(); + check_stack_depth(); + + Assert(!bms_overlap(paramRelids, candidateRelids)); + AddColumnarScanPath(root, rel, rte, paramRelids); + + /* recurse for all candidateRelids, unless we hit the depth limit */ + Assert(depthLimit >= 0); + if (depthLimit-- == 0) + { + return; + } + + /* + * Iterate through parameter combinations depth-first. Deeper levels + * generate paths of greater parameterization (and hopefully lower + * cost). + */ + Relids tmpCandidateRelids = bms_copy(candidateRelids); + int relid = -1; + while ((relid = bms_next_member(candidateRelids, relid)) >= 0) + { + Relids tmpParamRelids = bms_add_member( + bms_copy(paramRelids), relid); + + /* + * Because we are generating combinations (not permutations), remove + * the relid from the set of candidates at this level as we descend to + * the next. + */ + tmpCandidateRelids = bms_del_member(tmpCandidateRelids, relid); + + AddColumnarScanPathsRec(root, rel, rte, tmpParamRelids, + tmpCandidateRelids, depthLimit); + } + + bms_free(tmpCandidateRelids); +} + + +/* + * ParameterizationAsString returns the string representation of the set of + * rels given in paramRelids. + * + * Takes a StringInfo so that it doesn't return palloc'd memory. This makes it + * easy to call this function as an argument to ereport(), such that it won't + * be evaluated unless the message is going to be output somewhere. + */ +static char * +ParameterizationAsString(PlannerInfo *root, Relids paramRelids, StringInfo buf) +{ + bool firstTime = true; + int relid = -1; + + if (bms_num_members(paramRelids) == 0) + { + return "unparameterized"; + } + + appendStringInfoString(buf, "parameterized by rels {"); + while ((relid = bms_next_member(paramRelids, relid)) >= 0) + { + RangeTblEntry *rte = root->simple_rte_array[relid]; + const char *relname = quote_identifier(rte->eref->aliasname); + + appendStringInfo(buf, "%s%s", firstTime ? "" : ", ", relname); + + if (relname != rte->eref->aliasname) + { + pfree((void *) relname); + } + + firstTime = false; + } + appendStringInfoString(buf, "}"); + return buf->data; +} + + +/* + * Create and add a path with the given parameterization paramRelids. + * + * XXX: Consider refactoring to be more like postgresGetForeignPaths(). The + * only differences are param_info and custom_private. + */ +static void +AddColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte, + Relids paramRelids) { /* * Must return a CustomPath, not a larger structure containing a * CustomPath as the first field. Otherwise, nodeToString() will fail to * output the additional fields. */ - CustomPath *cpath = (CustomPath *) newNode(sizeof(CustomPath), T_CustomPath); + CustomPath *cpath = makeNode(CustomPath); cpath->methods = &ColumnarScanPathMethods; @@ -520,38 +1102,79 @@ CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) /* columnar scans are not parallel-aware, but they are parallel-safe */ path->parallel_safe = rel->consider_parallel; - /* - * We don't support pushing join clauses into the quals of a seqscan, but - * it could still have required parameterization due to LATERAL refs in - * its tlist. - */ - path->param_info = get_baserel_parampathinfo(root, rel, - rel->lateral_relids); + path->param_info = get_baserel_parampathinfo(root, rel, paramRelids); /* - * Add cost estimates for a columnar table scan, row count is the rows estimated by - * postgres' planner. + * Now, baserestrictinfo contains the clauses referencing only this rel, + * and ppi_clauses (if present) represents the join clauses that reference + * this rel and rels contained in paramRelids (after accounting for + * ECs). Combine the two lists of clauses, extracting the actual clause + * from the rinfo, and filtering out pseudoconstants and SAOPs. */ - path->rows = rel->rows; - path->startup_cost = 0; + List *allClauses = copyObject(rel->baserestrictinfo); + if (path->param_info != NULL) + { + allClauses = list_concat(allClauses, path->param_info->ppi_clauses); + } + + /* + * This is the set of clauses that can be pushed down for this + * parameterization (with the given paramRelids), and will be used to + * construct the CustomScan plan. + */ + List *pushdownClauses = FilterPushdownClauses(root, rel, allClauses); + + if (EnableColumnarQualPushdown) + { + cpath->custom_private = pushdownClauses; + } + int numberOfColumnsRead = bms_num_members(rte->selectedCols); - path->total_cost = path->startup_cost + - ColumnarScanCost(rel, rte->relid, numberOfColumnsRead); + int numberOfClausesPushed = list_length(cpath->custom_private); - return (Path *) cpath; + CostColumnarScan(cpath, root, rel, rte->relid, numberOfColumnsRead, + numberOfClausesPushed); + + + StringInfoData buf; + initStringInfo(&buf); + ereport(ColumnarPlannerDebugLevel, + (errmsg("columnar planner: adding CustomScan path for %s", + rte->eref->aliasname), + errdetail("%s; %d clauses pushed down", + ParameterizationAsString(root, paramRelids, &buf), + numberOfClausesPushed))); + + add_path(rel, path); } /* - * ColumnarScanCost calculates the cost of scanning the columnar table. The cost is estimated - * by using all stripe metadata to estimate based on the columns to read how many pages - * need to be read. + * CostColumnarScan calculates the cost of scanning the columnar table. The + * cost is estimated by using all stripe metadata to estimate based on the + * columns to read how many pages need to be read. */ -static Cost -ColumnarScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead) +static void +CostColumnarScan(CustomPath *cpath, PlannerInfo *root, RelOptInfo *rel, + Oid relationId, int numberOfColumnsRead, int nClauses) { - return ColumnarTableStripeCount(relationId) * - ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead); + Path *path = &cpath->path; + + Selectivity clauseSel = clauselist_selectivity( + root, cpath->custom_private, rel->relid, JOIN_INNER, NULL); + + /* + * We already filtered out clauses where the overall selectivity would be + * misleading, such as inequalities involving an uncorrelated column. So + * we can apply the selectivity directly to the number of stripes. + */ + double stripesToRead = clauseSel * ColumnarTableStripeCount(relationId); + stripesToRead = Max(stripesToRead, 1.0); + + path->rows = rel->rows; + path->startup_cost = 0; + path->total_cost = stripesToRead * + ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead); } @@ -637,15 +1260,32 @@ ColumnarScanPath_PlanCustomPath(PlannerInfo *root, * CustomScan as the first field. Otherwise, copyObject() will fail to * copy the additional fields. */ - CustomScan *cscan = (CustomScan *) newNode(sizeof(CustomScan), T_CustomScan); + CustomScan *cscan = makeNode(CustomScan); cscan->methods = &ColumnarScanScanMethods; - /* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */ - clauses = extract_actual_clauses(clauses, false); + /* XXX: also need to store projected column list for EXPLAIN */ + if (EnableColumnarQualPushdown) + { + /* + * List of pushed-down clauses. The Vars referencing other relations + * will be changed into exec Params by create_customscan_plan(). + * + * XXX: this just means what will be pushed into the columnar reader + * code; some of these may not be usable. We should fix this by + * passing down something more like ScanKeys, where we've already + * verified that the operators match the btree opclass of the chunk + * predicates. + */ + cscan->custom_exprs = copyObject( + extract_actual_clauses(best_path->custom_private, + false /* no pseudoconstants */)); + } + + cscan->scan.plan.qual = extract_actual_clauses( + clauses, false /* no pseudoconstants */); cscan->scan.plan.targetlist = list_copy(tlist); - cscan->scan.plan.qual = clauses; cscan->scan.scanrelid = best_path->path.parent->relid; return (Plan *) cscan; @@ -661,18 +1301,69 @@ ColumnarScan_CreateCustomScanState(CustomScan *cscan) CustomScanState *cscanstate = &columnarScanState->custom_scanstate; cscanstate->methods = &ColumnarScanExecuteMethods; - if (EnableColumnarQualPushdown) + return (Node *) cscanstate; +} + + +/* + * EvalParamsMutator evaluates Params in the expression and replaces them with + * Consts. + */ +static Node * +EvalParamsMutator(Node *node, ExprContext *econtext) +{ + if (node == NULL) { - columnarScanState->qual = cscan->scan.plan.qual; + return NULL; } - return (Node *) cscanstate; + if (IsA(node, Param)) + { + Param *param = (Param *) node; + int16 typLen; + bool typByVal; + bool isnull; + + get_typlenbyval(param->paramtype, &typLen, &typByVal); + + /* XXX: should save ExprState for efficiency */ + ExprState *exprState = ExecInitExprWithParams((Expr *) node, + econtext->ecxt_param_list_info); + Datum pval = ExecEvalExpr(exprState, econtext, &isnull); + + return (Node *) makeConst(param->paramtype, + param->paramtypmod, + param->paramcollid, + (int) typLen, + pval, + isnull, + typByVal); + } + + return expression_tree_mutator(node, EvalParamsMutator, (void *) econtext); } static void ColumnarScan_BeginCustomScan(CustomScanState *cscanstate, EState *estate, int eflags) { + CustomScan *cscan = (CustomScan *) cscanstate->ss.ps.plan; + ColumnarScanState *columnarScanState = (ColumnarScanState *) cscanstate; + ExprContext *stdecontext = cscanstate->ss.ps.ps_ExprContext; + + /* + * Make a new ExprContext just like the existing one, except that we don't + * reset it every tuple. + */ + ExecAssignExprContext(estate, &cscanstate->ss.ps); + columnarScanState->css_RuntimeContext = cscanstate->ss.ps.ps_ExprContext; + cscanstate->ss.ps.ps_ExprContext = stdecontext; + + /* XXX: separate into runtime clauses and normal clauses */ + ResetExprContext(columnarScanState->css_RuntimeContext); + columnarScanState->qual = (List *) EvalParamsMutator( + (Node *) cscan->custom_exprs, columnarScanState->css_RuntimeContext); + /* scan slot is already initialized */ } @@ -821,10 +1512,21 @@ ColumnarScan_EndCustomScan(CustomScanState *node) static void ColumnarScan_ReScanCustomScan(CustomScanState *node) { + CustomScan *cscan = (CustomScan *) node->ss.ps.plan; + ColumnarScanState *columnarScanState = (ColumnarScanState *) node; + + ResetExprContext(columnarScanState->css_RuntimeContext); + columnarScanState->qual = (List *) EvalParamsMutator( + (Node *) cscan->custom_exprs, columnarScanState->css_RuntimeContext); + TableScanDesc scanDesc = node->ss.ss_currentScanDesc; + if (scanDesc != NULL) { - table_rescan(node->ss.ss_currentScanDesc, NULL); + /* XXX: hack to pass quals as scan keys */ + ScanKey scanKeys = (ScanKey) columnarScanState->qual; + table_rescan(node->ss.ss_currentScanDesc, + scanKeys); } } @@ -833,50 +1535,79 @@ static void ColumnarScan_ExplainCustomScan(CustomScanState *node, List *ancestors, ExplainState *es) { - ColumnarScanDesc columnarScanDesc = (ColumnarScanDesc) node->ss.ss_currentScanDesc; - - if (columnarScanDesc != NULL) - { - int64 chunkGroupsFiltered = ColumnarScanChunkGroupsFiltered(columnarScanDesc); - ExplainPropertyInteger("Columnar Chunk Groups Removed by Filter", NULL, - chunkGroupsFiltered, es); - } - ColumnarScanState *columnarScanState = (ColumnarScanState *) node; - const char *projectedColumnsStr = - ColumnarScanProjectedColumnsStr(columnarScanState, ancestors, es); - ExplainPropertyText("Columnar Projected Columns", projectedColumnsStr, es); + + List *context = set_deparse_context_planstate( + es->deparse_cxt, (Node *) &node->ss.ps, ancestors); + + List *projectedColumns = ColumnarVarNeeded(columnarScanState); + const char *projectedColumnsStr = ColumnarProjectedColumnsStr( + context, projectedColumns); + ExplainPropertyText("Columnar Projected Columns", + projectedColumnsStr, es); + + CustomScan *cscan = castNode(CustomScan, node->ss.ps.plan); + List *chunkGroupFilter = cscan->custom_exprs; + if (chunkGroupFilter != NULL) + { + const char *pushdownClausesStr = ColumnarPushdownClausesStr( + context, chunkGroupFilter); + ExplainPropertyText("Columnar Chunk Group Filters", + pushdownClausesStr, es); + + ColumnarScanDesc columnarScanDesc = + (ColumnarScanDesc) node->ss.ss_currentScanDesc; + if (columnarScanDesc != NULL) + { + ExplainPropertyInteger( + "Columnar Chunk Groups Removed by Filter", + NULL, ColumnarScanChunkGroupsFiltered(columnarScanDesc), es); + } + } } /* - * ColumnarScanProjectedColumnsStr generates projected column string for + * ColumnarPushdownClausesStr represents the clauses to push down as a string. + */ +static const char * +ColumnarPushdownClausesStr(List *context, List *clauses) +{ + Expr *conjunction; + + Assert(list_length(clauses) > 0); + + if (list_length(clauses) == 1) + { + conjunction = (Expr *) linitial(clauses); + } + else + { + conjunction = make_andclause(clauses); + } + + bool useTableNamePrefix = false; + bool showImplicitCast = false; + return deparse_expression((Node *) conjunction, context, + useTableNamePrefix, showImplicitCast); +} + + +/* + * ColumnarProjectedColumnsStr generates projected column string for * explain output. */ static const char * -ColumnarScanProjectedColumnsStr(ColumnarScanState *columnarScanState, List *ancestors, - ExplainState *es) +ColumnarProjectedColumnsStr(List *context, List *projectedColumns) { - ScanState *scanState = &columnarScanState->custom_scanstate.ss; - - List *neededVarList = ColumnarVarNeeded(columnarScanState); - if (list_length(neededVarList) == 0) + if (list_length(projectedColumns) == 0) { return ""; } -#if PG_VERSION_NUM >= 130000 - List *context = - set_deparse_context_plan(es->deparse_cxt, scanState->ps.plan, ancestors); -#else - List *context = - set_deparse_context_planstate(es->deparse_cxt, (Node *) &scanState->ps, - ancestors); -#endif - bool useTableNamePrefix = false; bool showImplicitCast = false; - return deparse_expression((Node *) neededVarList, context, + return deparse_expression((Node *) projectedColumns, context, useTableNamePrefix, showImplicitCast); } @@ -945,3 +1676,19 @@ ColumnarVarNeeded(ColumnarScanState *columnarScanState) return varList; } + + +#if PG_VERSION_NUM >= 130000 + +/* + * set_deparse_context_planstate is a compatibility wrapper for versions 13+. + */ +static List * +set_deparse_context_planstate(List *dpcontext, Node *node, List *ancestors) +{ + PlanState *ps = (PlanState *) node; + return set_deparse_context_plan(dpcontext, ps->plan, ancestors); +} + + +#endif diff --git a/src/backend/columnar/columnar_reader.c b/src/backend/columnar/columnar_reader.c index 28381804d..c88dae8f3 100644 --- a/src/backend/columnar/columnar_reader.c +++ b/src/backend/columnar/columnar_reader.c @@ -467,7 +467,7 @@ HasUnreadStripe(ColumnarReadState *readState) * the beginning again */ void -ColumnarRescan(ColumnarReadState *readState) +ColumnarRescan(ColumnarReadState *readState, List *scanQual) { MemoryContext oldContext = MemoryContextSwitchTo(readState->scanContext); @@ -477,6 +477,7 @@ ColumnarRescan(ColumnarReadState *readState) readState->snapshot); readState->chunkGroupsFiltered = 0; + readState->whereClauseList = copyObject(scanQual); MemoryContextSwitchTo(oldContext); } diff --git a/src/backend/columnar/columnar_tableam.c b/src/backend/columnar/columnar_tableam.c index 4b908f244..65cbc8e8f 100644 --- a/src/backend/columnar/columnar_tableam.c +++ b/src/backend/columnar/columnar_tableam.c @@ -330,9 +330,13 @@ columnar_rescan(TableScanDesc sscan, ScanKey key, bool set_params, bool allow_strat, bool allow_sync, bool allow_pagemode) { ColumnarScanDesc scan = (ColumnarScanDesc) sscan; + + /* XXX: hack to pass in new quals that aren't actually scan keys */ + List *scanQual = (List *) key; + if (scan->cs_readState != NULL) { - ColumnarRescan(scan->cs_readState); + ColumnarRescan(scan->cs_readState, scanQual); } } diff --git a/src/include/columnar/columnar.h b/src/include/columnar/columnar.h index 799cd65ac..d439fd1fe 100644 --- a/src/include/columnar/columnar.h +++ b/src/include/columnar/columnar.h @@ -218,7 +218,7 @@ extern ColumnarReadState * ColumnarBeginRead(Relation relation, bool snapshotRegisteredByUs); extern bool ColumnarReadNextRow(ColumnarReadState *state, Datum *columnValues, bool *columnNulls, uint64 *rowNumber); -extern void ColumnarRescan(ColumnarReadState *readState); +extern void ColumnarRescan(ColumnarReadState *readState, List *scanQual); extern bool ColumnarReadRowByRowNumber(ColumnarReadState *readState, uint64 rowNumber, Datum *columnValues, bool *columnNulls); diff --git a/src/test/regress/expected/columnar_chunk_filtering.out b/src/test/regress/expected/columnar_chunk_filtering.out index 78a4852c3..3f110a64b 100644 --- a/src/test/regress/expected/columnar_chunk_filtering.out +++ b/src/test/regress/expected/columnar_chunk_filtering.out @@ -23,6 +23,7 @@ $$ RETURN result; END; $$ LANGUAGE PLPGSQL; +set columnar.qual_pushdown_correlation = 0.0; -- Create and load data -- chunk_group_row_limit '1000', stripe_row_limit '2000' set columnar.stripe_row_limit = 2000; @@ -127,9 +128,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=111111 loops=1) Filter: (i > 123456) Rows Removed by Filter: 3457 - Columnar Chunk Groups Removed by Filter: 12 Columnar Projected Columns: i -(5 rows) + Columnar Chunk Group Filters: (i > 123456) + Columnar Chunk Groups Removed by Filter: 12 +(6 rows) SET columnar.enable_qual_pushdown = false; EXPLAIN (analyze on, costs off, timing off, summary off) @@ -139,9 +141,8 @@ EXPLAIN (analyze on, costs off, timing off, summary off) Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=111111 loops=1) Filter: (i > 123456) Rows Removed by Filter: 123457 - Columnar Chunk Groups Removed by Filter: 0 Columnar Projected Columns: i -(5 rows) +(4 rows) SET columnar.enable_qual_pushdown TO DEFAULT; -- https://github.com/citusdata/citus/issues/4555 @@ -155,9 +156,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=20000 loops=1) Filter: (i > 180000) Rows Removed by Filter: 1 - Columnar Chunk Groups Removed by Filter: 18 Columnar Projected Columns: i -(5 rows) + Columnar Chunk Group Filters: (i > 180000) + Columnar Chunk Groups Removed by Filter: 18 +(6 rows) DROP TABLE simple_chunk_filtering; CREATE TABLE multi_column_chunk_filtering(a int, b int) USING columnar; @@ -170,9 +172,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) -> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=184567 loops=1) Filter: (a > 50000) Rows Removed by Filter: 1 - Columnar Chunk Groups Removed by Filter: 5 Columnar Projected Columns: a -(6 rows) + Columnar Chunk Group Filters: (a > 50000) + Columnar Chunk Groups Removed by Filter: 5 +(7 rows) EXPLAIN (analyze on, costs off, timing off, summary off) SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000; @@ -182,9 +185,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) -> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=184567 loops=1) Filter: ((a > 50000) AND (b > 50000)) Rows Removed by Filter: 1 - Columnar Chunk Groups Removed by Filter: 5 Columnar Projected Columns: a, b -(6 rows) + Columnar Chunk Group Filters: ((a > 50000) AND (b > 50000)) + Columnar Chunk Groups Removed by Filter: 5 +(7 rows) -- make next tests faster TRUNCATE multi_column_chunk_filtering; @@ -195,9 +199,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) --------------------------------------------------------------------- Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1) Filter: ((a > 50000) AND (b > 50000)) - Columnar Chunk Groups Removed by Filter: 1 Columnar Projected Columns: a, b -(4 rows) + Columnar Chunk Group Filters: ((a > 50000) AND (b > 50000)) + Columnar Chunk Groups Removed by Filter: 1 +(5 rows) EXPLAIN (analyze on, costs off, timing off, summary off) SELECT b, a FROM multi_column_chunk_filtering WHERE b > 50000; @@ -206,9 +211,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1) Filter: (b > 50000) Rows Removed by Filter: 6 - Columnar Chunk Groups Removed by Filter: 0 Columnar Projected Columns: a, b -(5 rows) + Columnar Chunk Group Filters: (b > 50000) + Columnar Chunk Groups Removed by Filter: 0 +(6 rows) EXPLAIN (analyze on, costs off, timing off, summary off) SELECT FROM multi_column_chunk_filtering WHERE a > 50000; @@ -216,18 +222,18 @@ EXPLAIN (analyze on, costs off, timing off, summary off) --------------------------------------------------------------------- Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1) Filter: (a > 50000) - Columnar Chunk Groups Removed by Filter: 1 Columnar Projected Columns: a -(4 rows) + Columnar Chunk Group Filters: (a > 50000) + Columnar Chunk Groups Removed by Filter: 1 +(5 rows) EXPLAIN (analyze on, costs off, timing off, summary off) SELECT FROM multi_column_chunk_filtering; QUERY PLAN --------------------------------------------------------------------- Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1) - Columnar Chunk Groups Removed by Filter: 0 Columnar Projected Columns: -(3 rows) +(2 rows) BEGIN; ALTER TABLE multi_column_chunk_filtering DROP COLUMN a; @@ -237,9 +243,8 @@ BEGIN; QUERY PLAN --------------------------------------------------------------------- Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1) - Columnar Chunk Groups Removed by Filter: 0 Columnar Projected Columns: -(3 rows) +(2 rows) ROLLBACK; CREATE TABLE another_columnar_table(x int, y int) USING columnar; @@ -252,10 +257,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) -> Custom Scan (ColumnarScan) on another_columnar_table (actual rows=4 loops=1) Filter: (x > 1) Rows Removed by Filter: 2 - Columnar Chunk Groups Removed by Filter: 0 Columnar Projected Columns: x, y - -> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=4) + Columnar Chunk Group Filters: (x > 1) Columnar Chunk Groups Removed by Filter: 0 + -> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=4) Columnar Projected Columns: a (9 rows) @@ -306,3 +311,438 @@ select filtered_row_count('select count(*) from part_table where id > 75000'); (1 row) drop table part_table; +-- test join parameterization +set columnar.stripe_row_limit = 2000; +set columnar.chunk_group_row_limit = 1000; +create table r1(id1 int, n1 int); -- row +create table r2(id2 int, n2 int); -- row +create table r3(id3 int, n3 int); -- row +create table r4(id4 int, n4 int); -- row +create table r5(id5 int, n5 int); -- row +create table r6(id6 int, n6 int); -- row +create table r7(id7 int, n7 int); -- row +create table coltest(id int, x1 int, x2 int, x3 int) using columnar; +create table coltest_part(id int, x1 int, x2 int, x3 int) + partition by range (id); +create table coltest_part0 + partition of coltest_part for values from (0) to (10000) + using columnar; +create table coltest_part1 + partition of coltest_part for values from (10000) to (20000); -- row +set columnar.stripe_row_limit to default; +set columnar.chunk_group_row_limit to default; +insert into r1 values(1234, 12350); +insert into r1 values(4567, 45000); +insert into r1 values(9101, 176000); +insert into r1 values(14202, 7); +insert into r1 values(18942, 189430); +insert into r2 values(1234, 123502); +insert into r2 values(4567, 450002); +insert into r2 values(9101, 1760002); +insert into r2 values(14202, 72); +insert into r2 values(18942, 1894302); +insert into r3 values(1234, 1235075); +insert into r3 values(4567, 4500075); +insert into r3 values(9101, 17600075); +insert into r3 values(14202, 775); +insert into r3 values(18942, 18943075); +insert into r4 values(1234, -1); +insert into r5 values(1234, -1); +insert into r6 values(1234, -1); +insert into r7 values(1234, -1); +insert into coltest + select g, g*10, g*100, g*1000 from generate_series(0, 19999) g; +insert into coltest_part + select g, g*10, g*100, g*1000 from generate_series(0, 19999) g; +ANALYZE r1, r2, r3, coltest, coltest_part; +-- force nested loop +set enable_mergejoin=false; +set enable_hashjoin=false; +set enable_material=false; +-- test different kinds of expressions +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, coltest WHERE + id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=3 loops=1) + -> Seq Scan on r1 (actual rows=4 loops=1) + Filter: ((n1 % 10) = 0) + Rows Removed by Filter: 1 + -> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=4) + Filter: ((x1 > 15000) AND (r1.id1 = id) AND ((x1)::text > '000000'::text)) + Rows Removed by Filter: 999 + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: ((x1 > 15000) AND (r1.id1 = id)) + Columnar Chunk Groups Removed by Filter: 19 +(10 rows) + +SELECT * FROM r1, coltest WHERE + id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0; + id1 | n1 | id | x1 | x2 | x3 +--------------------------------------------------------------------- + 4567 | 45000 | 4567 | 45670 | 456700 | 4567000 + 9101 | 176000 | 9101 | 91010 | 910100 | 9101000 + 18942 | 189430 | 18942 | 189420 | 1894200 | 18942000 +(3 rows) + +-- test equivalence classes +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE + id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND + id4 = id5 AND id5 = id6 AND id6 = id7; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=1 loops=1) + Join Filter: (coltest.id = r7.id7) + -> Nested Loop (actual rows=1 loops=1) + Join Filter: (coltest.id = r6.id6) + -> Nested Loop (actual rows=1 loops=1) + Join Filter: (coltest.id = r5.id5) + -> Nested Loop (actual rows=1 loops=1) + Join Filter: (coltest.id = r4.id4) + Rows Removed by Join Filter: 4 + -> Nested Loop (actual rows=5 loops=1) + -> Nested Loop (actual rows=5 loops=1) + Join Filter: (r1.id1 = r3.id3) + Rows Removed by Join Filter: 20 + -> Nested Loop (actual rows=5 loops=1) + Join Filter: (r1.id1 = r2.id2) + Rows Removed by Join Filter: 20 + -> Seq Scan on r1 (actual rows=5 loops=1) + -> Seq Scan on r2 (actual rows=5 loops=5) + -> Seq Scan on r3 (actual rows=5 loops=5) + -> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=5) + Filter: (r1.id1 = id) + Rows Removed by Filter: 999 + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: (r1.id1 = id) + Columnar Chunk Groups Removed by Filter: 19 + -> Seq Scan on r4 (actual rows=1 loops=5) + -> Seq Scan on r5 (actual rows=1 loops=1) + -> Seq Scan on r6 (actual rows=1 loops=1) + -> Seq Scan on r7 (actual rows=1 loops=1) +(29 rows) + +SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE + id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND + id4 = id5 AND id5 = id6 AND id6 = id7; + id1 | n1 | id2 | n2 | id3 | n3 | id4 | n4 | id5 | n5 | id6 | n6 | id7 | n7 | id | x1 | x2 | x3 +--------------------------------------------------------------------- + 1234 | 12350 | 1234 | 123502 | 1234 | 1235075 | 1234 | -1 | 1234 | -1 | 1234 | -1 | 1234 | -1 | 1234 | 12340 | 123400 | 1234000 +(1 row) + +-- test path generation with different thresholds +set columnar.planner_debug_level = 'notice'; +set columnar.max_custom_scan_paths to 10; +EXPLAIN (costs off, timing off, summary off) + SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE + c1.id = c2.id and c1.id = c3.id and c1.id = c4.id; +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c2, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c2, c3, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c2, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c3, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c1}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c1, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c1, c3, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c1, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c3, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c1}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c1, c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c1, c2, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c1, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c2, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c1}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c1, c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c1, c2, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c1, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c2, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c3}; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Nested Loop + -> Custom Scan (ColumnarScan) on coltest c1 + Columnar Projected Columns: id, x1, x2, x3 + -> Custom Scan (ColumnarScan) on coltest c2 + Filter: (c1.id = id) + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: (c1.id = id) + -> Custom Scan (ColumnarScan) on coltest c3 + Filter: (c1.id = id) + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: (c1.id = id) + -> Custom Scan (ColumnarScan) on coltest c4 + Filter: (c1.id = id) + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: (c1.id = id) +(17 rows) + +set columnar.max_custom_scan_paths to 2; +EXPLAIN (costs off, timing off, summary off) + SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE + c1.id = c2.id and c1.id = c3.id and c1.id = c4.id; +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: unparameterized; 0 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop + Join Filter: (c1.id = c4.id) + -> Nested Loop + Join Filter: (c1.id = c3.id) + -> Nested Loop + Join Filter: (c1.id = c2.id) + -> Custom Scan (ColumnarScan) on coltest c1 + Columnar Projected Columns: id, x1, x2, x3 + -> Custom Scan (ColumnarScan) on coltest c2 + Columnar Projected Columns: id, x1, x2, x3 + -> Custom Scan (ColumnarScan) on coltest c3 + Columnar Projected Columns: id, x1, x2, x3 + -> Custom Scan (ColumnarScan) on coltest c4 + Columnar Projected Columns: id, x1, x2, x3 +(14 rows) + +set columnar.max_custom_scan_paths to default; +set columnar.planner_debug_level to default; +-- test more complex parameterization +set columnar.planner_debug_level = 'notice'; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, r2, r3, coltest WHERE + id1 = id2 AND id2 = id3 AND id3 = id AND + n1 > x1 AND n2 > x2 AND n3 > x3; +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r1}; 2 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r1, r2}; 3 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r1, r2, r3}; 4 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r1, r3}; 3 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r2}; 2 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r2, r3}; 3 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r3}; 2 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=3 loops=1) + Join Filter: ((r3.n3 > coltest.x3) AND (r1.id1 = r3.id3)) + Rows Removed by Join Filter: 12 + -> Nested Loop (actual rows=3 loops=1) + Join Filter: ((r2.n2 > coltest.x2) AND (r1.id1 = r2.id2)) + Rows Removed by Join Filter: 12 + -> Nested Loop (actual rows=3 loops=1) + -> Seq Scan on r1 (actual rows=5 loops=1) + -> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=5) + Filter: ((r1.n1 > x1) AND (r1.id1 = id)) + Rows Removed by Filter: 799 + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: ((r1.n1 > x1) AND (r1.id1 = id)) + Columnar Chunk Groups Removed by Filter: 19 + -> Seq Scan on r2 (actual rows=5 loops=3) + -> Seq Scan on r3 (actual rows=5 loops=3) +(16 rows) + +set columnar.planner_debug_level to default; +SELECT * FROM r1, r2, r3, coltest WHERE + id1 = id2 AND id2 = id3 AND id3 = id AND + n1 > x1 AND n2 > x2 AND n3 > x3; + id1 | n1 | id2 | n2 | id3 | n3 | id | x1 | x2 | x3 +--------------------------------------------------------------------- + 1234 | 12350 | 1234 | 123502 | 1234 | 1235075 | 1234 | 12340 | 123400 | 1234000 + 9101 | 176000 | 9101 | 1760002 | 9101 | 17600075 | 9101 | 91010 | 910100 | 9101000 + 18942 | 189430 | 18942 | 1894302 | 18942 | 18943075 | 18942 | 189420 | 1894200 | 18942000 +(3 rows) + +-- test partitioning parameterization +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, coltest_part WHERE + id1 = id AND n1 > x1; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=3 loops=1) + -> Seq Scan on r1 (actual rows=5 loops=1) + -> Append (actual rows=1 loops=5) + -> Custom Scan (ColumnarScan) on coltest_part0 coltest_part_1 (actual rows=1 loops=3) + Filter: ((r1.n1 > x1) AND (r1.id1 = id)) + Rows Removed by Filter: 999 + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: ((r1.n1 > x1) AND (r1.id1 = id)) + Columnar Chunk Groups Removed by Filter: 9 + -> Seq Scan on coltest_part1 coltest_part_2 (actual rows=0 loops=2) + Filter: ((r1.n1 > x1) AND (r1.id1 = id)) + Rows Removed by Filter: 10000 +(12 rows) + +SELECT * FROM r1, coltest_part WHERE + id1 = id AND n1 > x1; + id1 | n1 | id | x1 | x2 | x3 +--------------------------------------------------------------------- + 1234 | 12350 | 1234 | 12340 | 123400 | 1234000 + 9101 | 176000 | 9101 | 91010 | 910100 | 9101000 + 18942 | 189430 | 18942 | 189420 | 1894200 | 18942000 +(3 rows) + +set enable_mergejoin to default; +set enable_hashjoin to default; +set enable_material to default; +set columnar.planner_debug_level = 'notice'; +alter table coltest add column x5 int default (random()*20000)::int; +analyze coltest; +-- test that expressions on whole-row references are not pushed down +select * from coltest where coltest = (1,1,1,1); +NOTICE: columnar planner: cannot push down clause: var is whole-row reference +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: unparameterized; 0 clauses pushed down + id | x1 | x2 | x3 | x5 +--------------------------------------------------------------------- +(0 rows) + +-- test that expressions on uncorrelated attributes are not pushed down +set columnar.qual_pushdown_correlation to default; +select * from coltest where x5 = 23484; +NOTICE: columnar planner: cannot push down clause: var attribute 5 is uncorrelated +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: unparameterized; 0 clauses pushed down + id | x1 | x2 | x3 | x5 +--------------------------------------------------------------------- +(0 rows) + +-- test that expressions on volatile functions are not pushed down +create function vol() returns int language plpgsql as $$ +BEGIN + RETURN 1; +END; +$$; +select * from coltest where x3 = vol(); +NOTICE: columnar planner: cannot push down clause: expr contains volatile functions +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: unparameterized; 0 clauses pushed down + id | x1 | x2 | x3 | x5 +--------------------------------------------------------------------- +(0 rows) + +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM coltest c1 WHERE ceil(x1) > 4222; +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: unparameterized; 0 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on coltest c1 (actual rows=19577 loops=1) + Filter: (ceil((x1)::double precision) > '4222'::double precision) + Rows Removed by Filter: 423 + Columnar Projected Columns: id, x1, x2, x3, x5 +(4 rows) + +set columnar.planner_debug_level to default; +-- +-- https://github.com/citusdata/citus/issues/4488 +-- +create table columnar_prepared_stmt (x int, y int) using columnar; +insert into columnar_prepared_stmt select s, s from generate_series(1,5000000) s; +prepare foo (int) as select x from columnar_prepared_stmt where x = $1; +execute foo(3); + x +--------------------------------------------------------------------- + 3 +(1 row) + +execute foo(3); + x +--------------------------------------------------------------------- + 3 +(1 row) + +execute foo(3); + x +--------------------------------------------------------------------- + 3 +(1 row) + +execute foo(3); + x +--------------------------------------------------------------------- + 3 +(1 row) + +select filtered_row_count('execute foo(3)'); + filtered_row_count +--------------------------------------------------------------------- + 9999 +(1 row) + +select filtered_row_count('execute foo(3)'); + filtered_row_count +--------------------------------------------------------------------- + 9999 +(1 row) + +select filtered_row_count('execute foo(3)'); + filtered_row_count +--------------------------------------------------------------------- + 9999 +(1 row) + +select filtered_row_count('execute foo(3)'); + filtered_row_count +--------------------------------------------------------------------- + 9999 +(1 row) + +drop table columnar_prepared_stmt; diff --git a/src/test/regress/expected/columnar_chunk_filtering_0.out b/src/test/regress/expected/columnar_chunk_filtering_0.out new file mode 100644 index 000000000..18e32a0cc --- /dev/null +++ b/src/test/regress/expected/columnar_chunk_filtering_0.out @@ -0,0 +1,748 @@ +-- +-- Test chunk filtering in columnar using min/max values in stripe skip lists. +-- +-- +-- filtered_row_count returns number of rows filtered by the WHERE clause. +-- If chunks get filtered by columnar, less rows are passed to WHERE +-- clause, so this function should return a lower number. +-- +CREATE OR REPLACE FUNCTION filtered_row_count (query text) RETURNS bigint AS +$$ + DECLARE + result bigint; + rec text; + BEGIN + result := 0; + + FOR rec IN EXECUTE 'EXPLAIN ANALYZE ' || query LOOP + IF rec ~ '^\s+Rows Removed by Filter' then + result := regexp_replace(rec, '[^0-9]*', '', 'g'); + END IF; + END LOOP; + + RETURN result; + END; +$$ LANGUAGE PLPGSQL; +set columnar.qual_pushdown_correlation = 0.0; +-- Create and load data +-- chunk_group_row_limit '1000', stripe_row_limit '2000' +set columnar.stripe_row_limit = 2000; +set columnar.chunk_group_row_limit = 1000; +CREATE TABLE test_chunk_filtering (a int) + USING columnar; +INSERT INTO test_chunk_filtering SELECT generate_series(1,10000); +-- Verify that filtered_row_count is less than 1000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering'); + filtered_row_count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 200'); + filtered_row_count +--------------------------------------------------------------------- + 801 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a > 200'); + filtered_row_count +--------------------------------------------------------------------- + 200 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 9900'); + filtered_row_count +--------------------------------------------------------------------- + 101 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a > 9900'); + filtered_row_count +--------------------------------------------------------------------- + 900 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 0'); + filtered_row_count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Verify that filtered_row_count is less than 2000 for the following queries +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 1 AND 10'); + filtered_row_count +--------------------------------------------------------------------- + 990 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +--------------------------------------------------------------------- + 1979 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN -10 AND 0'); + filtered_row_count +--------------------------------------------------------------------- + 0 +(1 row) + +-- Load data for second time and verify that filtered_row_count is exactly twice as before +INSERT INTO test_chunk_filtering SELECT generate_series(1,10000); +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 200'); + filtered_row_count +--------------------------------------------------------------------- + 1602 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a < 0'); + filtered_row_count +--------------------------------------------------------------------- + 0 +(1 row) + +SELECT filtered_row_count('SELECT count(*) FROM test_chunk_filtering WHERE a BETWEEN 990 AND 2010'); + filtered_row_count +--------------------------------------------------------------------- + 3958 +(1 row) + +set columnar.stripe_row_limit to default; +set columnar.chunk_group_row_limit to default; +-- Verify that we are fine with collations which use a different alphabet order +CREATE TABLE collation_chunk_filtering_test(A text collate "da_DK") + USING columnar; +COPY collation_chunk_filtering_test FROM STDIN; +SELECT * FROM collation_chunk_filtering_test WHERE A > 'B'; + a +--------------------------------------------------------------------- + Å +(1 row) + +CREATE TABLE simple_chunk_filtering(i int) USING COLUMNAR; +INSERT INTO simple_chunk_filtering SELECT generate_series(0,234567); +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM simple_chunk_filtering WHERE i > 123456; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=111111 loops=1) + Filter: (i > 123456) + Rows Removed by Filter: 3457 + Columnar Projected Columns: i + Columnar Chunk Group Filters: (i > 123456) + Columnar Chunk Groups Removed by Filter: 12 +(6 rows) + +SET columnar.enable_qual_pushdown = false; +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM simple_chunk_filtering WHERE i > 123456; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=111111 loops=1) + Filter: (i > 123456) + Rows Removed by Filter: 123457 + Columnar Projected Columns: i +(4 rows) + +SET columnar.enable_qual_pushdown TO DEFAULT; +-- https://github.com/citusdata/citus/issues/4555 +TRUNCATE simple_chunk_filtering; +INSERT INTO simple_chunk_filtering SELECT generate_series(0,200000); +COPY (SELECT * FROM simple_chunk_filtering WHERE i > 180000) TO '/dev/null'; +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM simple_chunk_filtering WHERE i > 180000; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on simple_chunk_filtering (actual rows=20000 loops=1) + Filter: (i > 180000) + Rows Removed by Filter: 1 + Columnar Projected Columns: i + Columnar Chunk Group Filters: (i > 180000) + Columnar Chunk Groups Removed by Filter: 18 +(6 rows) + +DROP TABLE simple_chunk_filtering; +CREATE TABLE multi_column_chunk_filtering(a int, b int) USING columnar; +INSERT INTO multi_column_chunk_filtering SELECT i,i+1 FROM generate_series(0,234567) i; +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=184567 loops=1) + Filter: (a > 50000) + Rows Removed by Filter: 1 + Columnar Projected Columns: a + Columnar Chunk Group Filters: (a > 50000) + Columnar Chunk Groups Removed by Filter: 5 +(7 rows) + +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT count(*) FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000; + QUERY PLAN +--------------------------------------------------------------------- + Aggregate (actual rows=1 loops=1) + -> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=184567 loops=1) + Filter: ((a > 50000) AND (b > 50000)) + Rows Removed by Filter: 1 + Columnar Projected Columns: a, b + Columnar Chunk Group Filters: ((a > 50000) AND (b > 50000)) + Columnar Chunk Groups Removed by Filter: 5 +(7 rows) + +-- make next tests faster +TRUNCATE multi_column_chunk_filtering; +INSERT INTO multi_column_chunk_filtering SELECT generate_series(0,5); +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT b FROM multi_column_chunk_filtering WHERE a > 50000 AND b > 50000; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1) + Filter: ((a > 50000) AND (b > 50000)) + Columnar Projected Columns: a, b + Columnar Chunk Group Filters: ((a > 50000) AND (b > 50000)) + Columnar Chunk Groups Removed by Filter: 1 +(5 rows) + +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT b, a FROM multi_column_chunk_filtering WHERE b > 50000; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1) + Filter: (b > 50000) + Rows Removed by Filter: 6 + Columnar Projected Columns: a, b + Columnar Chunk Group Filters: (b > 50000) + Columnar Chunk Groups Removed by Filter: 0 +(6 rows) + +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT FROM multi_column_chunk_filtering WHERE a > 50000; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=0 loops=1) + Filter: (a > 50000) + Columnar Projected Columns: a + Columnar Chunk Group Filters: (a > 50000) + Columnar Chunk Groups Removed by Filter: 1 +(5 rows) + +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT FROM multi_column_chunk_filtering; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1) + Columnar Projected Columns: +(2 rows) + +BEGIN; + ALTER TABLE multi_column_chunk_filtering DROP COLUMN a; + ALTER TABLE multi_column_chunk_filtering DROP COLUMN b; + EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM multi_column_chunk_filtering; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=1) + Columnar Projected Columns: +(2 rows) + +ROLLBACK; +CREATE TABLE another_columnar_table(x int, y int) USING columnar; +INSERT INTO another_columnar_table SELECT generate_series(0,5); +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT a, y FROM multi_column_chunk_filtering, another_columnar_table WHERE x > 1; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=24 loops=1) + -> Custom Scan (ColumnarScan) on another_columnar_table (actual rows=4 loops=1) + Filter: (x > 1) + Rows Removed by Filter: 2 + Columnar Projected Columns: x, y + Columnar Chunk Group Filters: (x > 1) + Columnar Chunk Groups Removed by Filter: 0 + -> Custom Scan (ColumnarScan) on multi_column_chunk_filtering (actual rows=6 loops=4) + Columnar Projected Columns: a +(9 rows) + +EXPLAIN (costs off, timing off, summary off) + SELECT y, * FROM another_columnar_table; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on another_columnar_table + Columnar Projected Columns: x, y +(2 rows) + +EXPLAIN (costs off, timing off, summary off) + SELECT *, x FROM another_columnar_table; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on another_columnar_table + Columnar Projected Columns: x, y +(2 rows) + +EXPLAIN (costs off, timing off, summary off) + SELECT y, another_columnar_table FROM another_columnar_table; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on another_columnar_table + Columnar Projected Columns: x, y +(2 rows) + +EXPLAIN (costs off, timing off, summary off) + SELECT another_columnar_table, x FROM another_columnar_table; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on another_columnar_table + Columnar Projected Columns: x, y +(2 rows) + +DROP TABLE multi_column_chunk_filtering, another_columnar_table; +-- +-- https://github.com/citusdata/citus/issues/4780 +-- +create table part_table (id int) partition by range (id); +create table part_1_row partition of part_table for values from (150000) to (160000); +create table part_2_columnar partition of part_table for values from (0) to (150000) using columnar; +insert into part_table select generate_series(1,159999); +select filtered_row_count('select count(*) from part_table where id > 75000'); + filtered_row_count +--------------------------------------------------------------------- + 5000 +(1 row) + +drop table part_table; +-- test join parameterization +set columnar.stripe_row_limit = 2000; +set columnar.chunk_group_row_limit = 1000; +create table r1(id1 int, n1 int); -- row +create table r2(id2 int, n2 int); -- row +create table r3(id3 int, n3 int); -- row +create table r4(id4 int, n4 int); -- row +create table r5(id5 int, n5 int); -- row +create table r6(id6 int, n6 int); -- row +create table r7(id7 int, n7 int); -- row +create table coltest(id int, x1 int, x2 int, x3 int) using columnar; +create table coltest_part(id int, x1 int, x2 int, x3 int) + partition by range (id); +create table coltest_part0 + partition of coltest_part for values from (0) to (10000) + using columnar; +create table coltest_part1 + partition of coltest_part for values from (10000) to (20000); -- row +set columnar.stripe_row_limit to default; +set columnar.chunk_group_row_limit to default; +insert into r1 values(1234, 12350); +insert into r1 values(4567, 45000); +insert into r1 values(9101, 176000); +insert into r1 values(14202, 7); +insert into r1 values(18942, 189430); +insert into r2 values(1234, 123502); +insert into r2 values(4567, 450002); +insert into r2 values(9101, 1760002); +insert into r2 values(14202, 72); +insert into r2 values(18942, 1894302); +insert into r3 values(1234, 1235075); +insert into r3 values(4567, 4500075); +insert into r3 values(9101, 17600075); +insert into r3 values(14202, 775); +insert into r3 values(18942, 18943075); +insert into r4 values(1234, -1); +insert into r5 values(1234, -1); +insert into r6 values(1234, -1); +insert into r7 values(1234, -1); +insert into coltest + select g, g*10, g*100, g*1000 from generate_series(0, 19999) g; +insert into coltest_part + select g, g*10, g*100, g*1000 from generate_series(0, 19999) g; +ANALYZE r1, r2, r3, coltest, coltest_part; +-- force nested loop +set enable_mergejoin=false; +set enable_hashjoin=false; +set enable_material=false; +-- test different kinds of expressions +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, coltest WHERE + id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=3 loops=1) + -> Seq Scan on r1 (actual rows=4 loops=1) + Filter: ((n1 % 10) = 0) + Rows Removed by Filter: 1 + -> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=4) + Filter: ((x1 > 15000) AND (r1.id1 = id) AND ((x1)::text > '000000'::text)) + Rows Removed by Filter: 999 + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: ((x1 > 15000) AND (r1.id1 = id)) + Columnar Chunk Groups Removed by Filter: 19 +(10 rows) + +SELECT * FROM r1, coltest WHERE + id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0; + id1 | n1 | id | x1 | x2 | x3 +--------------------------------------------------------------------- + 4567 | 45000 | 4567 | 45670 | 456700 | 4567000 + 9101 | 176000 | 9101 | 91010 | 910100 | 9101000 + 18942 | 189430 | 18942 | 189420 | 1894200 | 18942000 +(3 rows) + +-- test equivalence classes +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE + id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND + id4 = id5 AND id5 = id6 AND id6 = id7; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=1 loops=1) + Join Filter: (coltest.id = r7.id7) + -> Nested Loop (actual rows=1 loops=1) + Join Filter: (coltest.id = r6.id6) + -> Nested Loop (actual rows=1 loops=1) + Join Filter: (coltest.id = r5.id5) + -> Nested Loop (actual rows=1 loops=1) + Join Filter: (coltest.id = r4.id4) + Rows Removed by Join Filter: 4 + -> Nested Loop (actual rows=5 loops=1) + -> Nested Loop (actual rows=5 loops=1) + Join Filter: (r1.id1 = r3.id3) + Rows Removed by Join Filter: 20 + -> Nested Loop (actual rows=5 loops=1) + Join Filter: (r1.id1 = r2.id2) + Rows Removed by Join Filter: 20 + -> Seq Scan on r1 (actual rows=5 loops=1) + -> Seq Scan on r2 (actual rows=5 loops=5) + -> Seq Scan on r3 (actual rows=5 loops=5) + -> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=5) + Filter: (r1.id1 = id) + Rows Removed by Filter: 999 + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: (r1.id1 = id) + Columnar Chunk Groups Removed by Filter: 19 + -> Seq Scan on r4 (actual rows=1 loops=5) + -> Seq Scan on r5 (actual rows=1 loops=1) + -> Seq Scan on r6 (actual rows=1 loops=1) + -> Seq Scan on r7 (actual rows=1 loops=1) +(29 rows) + +SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE + id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND + id4 = id5 AND id5 = id6 AND id6 = id7; + id1 | n1 | id2 | n2 | id3 | n3 | id4 | n4 | id5 | n5 | id6 | n6 | id7 | n7 | id | x1 | x2 | x3 +--------------------------------------------------------------------- + 1234 | 12350 | 1234 | 123502 | 1234 | 1235075 | 1234 | -1 | 1234 | -1 | 1234 | -1 | 1234 | -1 | 1234 | 12340 | 123400 | 1234000 +(1 row) + +-- test path generation with different thresholds +set columnar.planner_debug_level = 'notice'; +set columnar.max_custom_scan_paths to 10; +EXPLAIN (costs off, timing off, summary off) + SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE + c1.id = c2.id and c1.id = c3.id and c1.id = c4.id; +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c2, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c2, c3, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c2, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c3, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: parameterized by rels {c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c1}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c1, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c1, c3, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c1, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c3, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: parameterized by rels {c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c1}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c1, c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c1, c2, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c1, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c2, c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: parameterized by rels {c4}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c1}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c1, c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c1, c2, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c1, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c2}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c2, c3}; 1 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: parameterized by rels {c3}; 1 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Nested Loop + -> Custom Scan (ColumnarScan) on coltest c1 + Columnar Projected Columns: id, x1, x2, x3 + -> Custom Scan (ColumnarScan) on coltest c2 + Filter: (c1.id = id) + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: (c1.id = id) + -> Custom Scan (ColumnarScan) on coltest c3 + Filter: (c1.id = id) + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: (c1.id = id) + -> Custom Scan (ColumnarScan) on coltest c4 + Filter: (c1.id = id) + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: (c1.id = id) +(17 rows) + +set columnar.max_custom_scan_paths to 2; +EXPLAIN (costs off, timing off, summary off) + SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE + c1.id = c2.id and c1.id = c3.id and c1.id = c4.id; +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c2 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c3 +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for c4 +DETAIL: unparameterized; 0 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop + Join Filter: (c1.id = c4.id) + -> Nested Loop + Join Filter: (c1.id = c3.id) + -> Nested Loop + Join Filter: (c1.id = c2.id) + -> Custom Scan (ColumnarScan) on coltest c1 + Columnar Projected Columns: id, x1, x2, x3 + -> Custom Scan (ColumnarScan) on coltest c2 + Columnar Projected Columns: id, x1, x2, x3 + -> Custom Scan (ColumnarScan) on coltest c3 + Columnar Projected Columns: id, x1, x2, x3 + -> Custom Scan (ColumnarScan) on coltest c4 + Columnar Projected Columns: id, x1, x2, x3 +(14 rows) + +set columnar.max_custom_scan_paths to default; +set columnar.planner_debug_level to default; +-- test more complex parameterization +set columnar.planner_debug_level = 'notice'; +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, r2, r3, coltest WHERE + id1 = id2 AND id2 = id3 AND id3 = id AND + n1 > x1 AND n2 > x2 AND n3 > x3; +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: unparameterized; 0 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r1}; 2 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r1, r2}; 3 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r1, r2, r3}; 4 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r1, r3}; 3 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r2}; 2 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r2, r3}; 3 clauses pushed down +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: parameterized by rels {r3}; 2 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=3 loops=1) + Join Filter: ((r3.n3 > coltest.x3) AND (r1.id1 = r3.id3)) + Rows Removed by Join Filter: 12 + -> Nested Loop (actual rows=3 loops=1) + Join Filter: ((r2.n2 > coltest.x2) AND (r1.id1 = r2.id2)) + Rows Removed by Join Filter: 12 + -> Nested Loop (actual rows=3 loops=1) + -> Seq Scan on r1 (actual rows=5 loops=1) + -> Custom Scan (ColumnarScan) on coltest (actual rows=1 loops=5) + Filter: ((r1.n1 > x1) AND (r1.id1 = id)) + Rows Removed by Filter: 799 + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: ((r1.n1 > x1) AND (r1.id1 = id)) + Columnar Chunk Groups Removed by Filter: 19 + -> Seq Scan on r2 (actual rows=5 loops=3) + -> Seq Scan on r3 (actual rows=5 loops=3) +(16 rows) + +set columnar.planner_debug_level to default; +SELECT * FROM r1, r2, r3, coltest WHERE + id1 = id2 AND id2 = id3 AND id3 = id AND + n1 > x1 AND n2 > x2 AND n3 > x3; + id1 | n1 | id2 | n2 | id3 | n3 | id | x1 | x2 | x3 +--------------------------------------------------------------------- + 1234 | 12350 | 1234 | 123502 | 1234 | 1235075 | 1234 | 12340 | 123400 | 1234000 + 9101 | 176000 | 9101 | 1760002 | 9101 | 17600075 | 9101 | 91010 | 910100 | 9101000 + 18942 | 189430 | 18942 | 1894302 | 18942 | 18943075 | 18942 | 189420 | 1894200 | 18942000 +(3 rows) + +-- test partitioning parameterization +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, coltest_part WHERE + id1 = id AND n1 > x1; + QUERY PLAN +--------------------------------------------------------------------- + Nested Loop (actual rows=3 loops=1) + -> Seq Scan on r1 (actual rows=5 loops=1) + -> Append (actual rows=1 loops=5) + -> Custom Scan (ColumnarScan) on coltest_part0 (actual rows=1 loops=3) + Filter: ((r1.n1 > x1) AND (r1.id1 = id)) + Rows Removed by Filter: 999 + Columnar Projected Columns: id, x1, x2, x3 + Columnar Chunk Group Filters: ((r1.n1 > x1) AND (r1.id1 = id)) + Columnar Chunk Groups Removed by Filter: 9 + -> Seq Scan on coltest_part1 (actual rows=0 loops=2) + Filter: ((r1.n1 > x1) AND (r1.id1 = id)) + Rows Removed by Filter: 10000 +(12 rows) + +SELECT * FROM r1, coltest_part WHERE + id1 = id AND n1 > x1; + id1 | n1 | id | x1 | x2 | x3 +--------------------------------------------------------------------- + 1234 | 12350 | 1234 | 12340 | 123400 | 1234000 + 9101 | 176000 | 9101 | 91010 | 910100 | 9101000 + 18942 | 189430 | 18942 | 189420 | 1894200 | 18942000 +(3 rows) + +set enable_mergejoin to default; +set enable_hashjoin to default; +set enable_material to default; +set columnar.planner_debug_level = 'notice'; +alter table coltest add column x5 int default (random()*20000)::int; +analyze coltest; +-- test that expressions on whole-row references are not pushed down +select * from coltest where coltest = (1,1,1,1); +NOTICE: columnar planner: cannot push down clause: var is whole-row reference +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: unparameterized; 0 clauses pushed down + id | x1 | x2 | x3 | x5 +--------------------------------------------------------------------- +(0 rows) + +-- test that expressions on uncorrelated attributes are not pushed down +set columnar.qual_pushdown_correlation to default; +select * from coltest where x5 = 23484; +NOTICE: columnar planner: cannot push down clause: var attribute 5 is uncorrelated +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: unparameterized; 0 clauses pushed down + id | x1 | x2 | x3 | x5 +--------------------------------------------------------------------- +(0 rows) + +-- test that expressions on volatile functions are not pushed down +create function vol() returns int language plpgsql as $$ +BEGIN + RETURN 1; +END; +$$; +select * from coltest where x3 = vol(); +NOTICE: columnar planner: cannot push down clause: expr contains volatile functions +NOTICE: columnar planner: adding CustomScan path for coltest +DETAIL: unparameterized; 0 clauses pushed down + id | x1 | x2 | x3 | x5 +--------------------------------------------------------------------- +(0 rows) + +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM coltest c1 WHERE ceil(x1) > 4222; +NOTICE: columnar planner: cannot push down clause: must match 'Var Expr' or 'Expr Var' +HINT: Var must only reference this rel, and Expr must not reference this rel +NOTICE: columnar planner: adding CustomScan path for c1 +DETAIL: unparameterized; 0 clauses pushed down + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on coltest c1 (actual rows=19577 loops=1) + Filter: (ceil((x1)::double precision) > '4222'::double precision) + Rows Removed by Filter: 423 + Columnar Projected Columns: id, x1, x2, x3, x5 +(4 rows) + +set columnar.planner_debug_level to default; +-- +-- https://github.com/citusdata/citus/issues/4488 +-- +create table columnar_prepared_stmt (x int, y int) using columnar; +insert into columnar_prepared_stmt select s, s from generate_series(1,5000000) s; +prepare foo (int) as select x from columnar_prepared_stmt where x = $1; +execute foo(3); + x +--------------------------------------------------------------------- + 3 +(1 row) + +execute foo(3); + x +--------------------------------------------------------------------- + 3 +(1 row) + +execute foo(3); + x +--------------------------------------------------------------------- + 3 +(1 row) + +execute foo(3); + x +--------------------------------------------------------------------- + 3 +(1 row) + +select filtered_row_count('execute foo(3)'); + filtered_row_count +--------------------------------------------------------------------- + 9999 +(1 row) + +select filtered_row_count('execute foo(3)'); + filtered_row_count +--------------------------------------------------------------------- + 9999 +(1 row) + +select filtered_row_count('execute foo(3)'); + filtered_row_count +--------------------------------------------------------------------- + 9999 +(1 row) + +select filtered_row_count('execute foo(3)'); + filtered_row_count +--------------------------------------------------------------------- + 9999 +(1 row) + +drop table columnar_prepared_stmt; diff --git a/src/test/regress/expected/columnar_cursor.out b/src/test/regress/expected/columnar_cursor.out index 887b7fc29..4e821cdea 100644 --- a/src/test/regress/expected/columnar_cursor.out +++ b/src/test/regress/expected/columnar_cursor.out @@ -10,9 +10,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM test_curs Custom Scan (ColumnarScan) on test_cursor (actual rows=101 loops=1) Filter: (a = 25) Rows Removed by Filter: 9899 - Columnar Chunk Groups Removed by Filter: 1 Columnar Projected Columns: a, b -(5 rows) + Columnar Chunk Group Filters: (a = 25) + Columnar Chunk Groups Removed by Filter: 1 +(6 rows) BEGIN; DECLARE a_25 SCROLL CURSOR @@ -112,9 +113,10 @@ EXPLAIN (analyze on, costs off, timing off, summary off) SELECT * FROM test_curs Custom Scan (ColumnarScan) on test_cursor (actual rows=7575 loops=1) Filter: (a > 25) Rows Removed by Filter: 2626 - Columnar Chunk Groups Removed by Filter: 0 Columnar Projected Columns: a, b -(5 rows) + Columnar Chunk Group Filters: (a > 25) + Columnar Chunk Groups Removed by Filter: 0 +(6 rows) BEGIN; DECLARE a_25 SCROLL CURSOR diff --git a/src/test/regress/expected/columnar_join.out b/src/test/regress/expected/columnar_join.out index 878b62ba6..04ffae31b 100644 --- a/src/test/regress/expected/columnar_join.out +++ b/src/test/regress/expected/columnar_join.out @@ -23,7 +23,7 @@ SELECT count(*) FROM users JOIN things ON (users.id = things.user_id) WHERE things.id > 299990; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Aggregate -> Nested Loop @@ -31,9 +31,10 @@ WHERE things.id > 299990; -> Custom Scan (ColumnarScan) on things Filter: (id > 299990) Columnar Projected Columns: id, user_id + Columnar Chunk Group Filters: (id > 299990) -> Custom Scan (ColumnarScan) on users Columnar Projected Columns: id -(8 rows) +(9 rows) EXPLAIN (COSTS OFF) SELECT u1.id, u2.id, COUNT(u2.*) @@ -41,7 +42,7 @@ FROM users u1 JOIN users u2 ON (u1.id::text = u2.name) WHERE u2.id > 299990 GROUP BY u1.id, u2.id; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- HashAggregate Group Key: u1.id, u2.id @@ -50,9 +51,10 @@ GROUP BY u1.id, u2.id; -> Custom Scan (ColumnarScan) on users u2 Filter: (id > 299990) Columnar Projected Columns: id, name + Columnar Chunk Group Filters: (id > 299990) -> Custom Scan (ColumnarScan) on users u1 Columnar Projected Columns: id -(9 rows) +(10 rows) SET client_min_messages TO warning; DROP SCHEMA am_columnar_join CASCADE; diff --git a/src/test/regress/expected/columnar_partitioning.out b/src/test/regress/expected/columnar_partitioning.out index fdba44b0a..cd91f665f 100644 --- a/src/test/regress/expected/columnar_partitioning.out +++ b/src/test/regress/expected/columnar_partitioning.out @@ -132,20 +132,22 @@ ANALYZE parent; -- scan on heap partition EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent WHERE ts > '2020-02-20' AND n < 5; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Aggregate -> Append -> Custom Scan (ColumnarScan) on p1 parent_1 Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) Columnar Projected Columns: ts, i, n + Columnar Chunk Group Filters: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) -> Index Scan using p2_n_idx on p2 parent_2 Index Cond: (n < '5'::numeric) Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) -> Custom Scan (ColumnarScan) on p3 parent_3 Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) Columnar Projected Columns: ts, i, n -(11 rows) + Columnar Chunk Group Filters: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) +(13 rows) BEGIN; SET LOCAL columnar.enable_custom_scan TO 'OFF'; diff --git a/src/test/regress/expected/columnar_partitioning_1.out b/src/test/regress/expected/columnar_partitioning_1.out index e873bec84..a3da26c42 100644 --- a/src/test/regress/expected/columnar_partitioning_1.out +++ b/src/test/regress/expected/columnar_partitioning_1.out @@ -132,20 +132,22 @@ ANALYZE parent; -- scan on heap partition EXPLAIN (costs off) SELECT count(*), sum(i), min(i), max(i) FROM parent WHERE ts > '2020-02-20' AND n < 5; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Aggregate -> Append -> Custom Scan (ColumnarScan) on p1 Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) Columnar Projected Columns: ts, i, n + Columnar Chunk Group Filters: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) -> Index Scan using p2_n_idx on p2 Index Cond: (n < '5'::numeric) Filter: (ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) -> Custom Scan (ColumnarScan) on p3 Filter: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) Columnar Projected Columns: ts, i, n -(11 rows) + Columnar Chunk Group Filters: ((ts > 'Thu Feb 20 00:00:00 2020 PST'::timestamp with time zone) AND (n < '5'::numeric)) +(13 rows) BEGIN; SET LOCAL columnar.enable_custom_scan TO 'OFF'; diff --git a/src/test/regress/expected/columnar_paths.out b/src/test/regress/expected/columnar_paths.out index f1511ab4b..96ee7c3c5 100644 --- a/src/test/regress/expected/columnar_paths.out +++ b/src/test/regress/expected/columnar_paths.out @@ -4,6 +4,8 @@ CREATE TABLE full_correlated (a int, b text, c int, d int) USING columnar; INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i; CREATE INDEX full_correlated_btree ON full_correlated (a); ANALYZE full_correlated; +-- Prevent qual pushdown from competing with index scans. +SET columnar.enable_qual_pushdown = false; SELECT columnar_test_helpers.uses_index_scan ( $$ SELECT a FROM full_correlated WHERE a=200; @@ -572,5 +574,54 @@ BEGIN; (1 row) ROLLBACK; +SET columnar.enable_qual_pushdown TO DEFAULT; +BEGIN; +SET LOCAL columnar.stripe_row_limit = 2000; +SET LOCAL columnar.chunk_group_row_limit = 1000; +CREATE TABLE correlated(x int) using columnar; +INSERT INTO correlated + SELECT g FROM generate_series(1,100000) g; +CREATE TABLE uncorrelated(x int) using columnar; +INSERT INTO uncorrelated + SELECT (g * 19) % 100000 FROM generate_series(1,100000) g; +COMMIT; +CREATE INDEX correlated_idx ON correlated(x); +CREATE INDEX uncorrelated_idx ON uncorrelated(x); +ANALYZE correlated, uncorrelated; +-- should choose chunk group filtering; selective and correlated +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM correlated WHERE x = 78910; + QUERY PLAN +--------------------------------------------------------------------- + Custom Scan (ColumnarScan) on correlated (actual rows=1 loops=1) + Filter: (x = 78910) + Rows Removed by Filter: 999 + Columnar Projected Columns: x + Columnar Chunk Group Filters: (x = 78910) + Columnar Chunk Groups Removed by Filter: 99 +(6 rows) + +SELECT * FROM correlated WHERE x = 78910; + x +--------------------------------------------------------------------- + 78910 +(1 row) + +-- should choose index scan; selective but uncorrelated +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM uncorrelated WHERE x = 78910; + QUERY PLAN +--------------------------------------------------------------------- + Index Only Scan using uncorrelated_idx on uncorrelated (actual rows=1 loops=1) + Index Cond: (x = 78910) + Heap Fetches: 1 +(3 rows) + +SELECT * FROM uncorrelated WHERE x = 78910; + x +--------------------------------------------------------------------- + 78910 +(1 row) + SET client_min_messages TO WARNING; DROP SCHEMA columnar_paths CASCADE; diff --git a/src/test/regress/expected/columnar_query.out b/src/test/regress/expected/columnar_query.out index 5e9cdea11..0934e332e 100644 --- a/src/test/regress/expected/columnar_query.out +++ b/src/test/regress/expected/columnar_query.out @@ -139,12 +139,13 @@ explain (costs off, summary off) select * from -> Custom Scan (ColumnarScan) on int8_tbl_columnar a Columnar Projected Columns: q1, q2 -> Nested Loop + -> Custom Scan (ColumnarScan) on int8_tbl_columnar c + Columnar Projected Columns: q1 -> Custom Scan (ColumnarScan) on int8_tbl_columnar b Filter: (a.q2 = q1) Columnar Projected Columns: q1 - -> Custom Scan (ColumnarScan) on int8_tbl_columnar c - Columnar Projected Columns: q1 -(9 rows) + Columnar Chunk Group Filters: (a.q2 = q1) +(10 rows) explain (costs off, summary off) SELECT COUNT(*) FROM INT8_TBL_columnar t1 JOIN diff --git a/src/test/regress/expected/columnar_transactions.out b/src/test/regress/expected/columnar_transactions.out index 0731f06c7..afe277151 100644 --- a/src/test/regress/expected/columnar_transactions.out +++ b/src/test/regress/expected/columnar_transactions.out @@ -260,7 +260,7 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p1(20); (2 rows) SELECT * FROM t ORDER BY a; - a | b + a | b --------------------------------------------------------------------- 1 | 2 2 | 4 @@ -327,12 +327,13 @@ SELECT * FROM t ORDER BY a; -- SELECT with 0 params PREPARE p3 AS SELECT * FROM t WHERE a = 8; EXPLAIN (COSTS OFF) EXECUTE p3; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Custom Scan (ColumnarScan) on t Filter: (a = 8) Columnar Projected Columns: a, b -(3 rows) + Columnar Chunk Group Filters: (a = 8) +(4 rows) EXECUTE p3; a | b @@ -347,9 +348,10 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p3; Custom Scan (ColumnarScan) on t (actual rows=2 loops=1) Filter: (a = 8) Rows Removed by Filter: 2 - Columnar Chunk Groups Removed by Filter: 8 Columnar Projected Columns: a, b -(5 rows) + Columnar Chunk Group Filters: (a = 8) + Columnar Chunk Groups Removed by Filter: 8 +(6 rows) SELECT * FROM t ORDER BY a; a | b @@ -382,12 +384,13 @@ SELECT * FROM t ORDER BY a; -- SELECT with 1 param PREPARE p5(int) AS SELECT * FROM t WHERE a = $1; EXPLAIN (COSTS OFF) EXECUTE p5(16); - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Custom Scan (ColumnarScan) on t Filter: (a = 16) Columnar Projected Columns: a, b -(3 rows) + Columnar Chunk Group Filters: (a = 16) +(4 rows) EXECUTE p5(16); a | b @@ -400,9 +403,10 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p5(9); Custom Scan (ColumnarScan) on t (actual rows=2 loops=1) Filter: (a = 9) Rows Removed by Filter: 2 - Columnar Chunk Groups Removed by Filter: 8 Columnar Projected Columns: a, b -(5 rows) + Columnar Chunk Group Filters: (a = 9) + Columnar Chunk Groups Removed by Filter: 8 +(6 rows) SELECT * FROM t ORDER BY a; a | b @@ -435,12 +439,13 @@ SELECT * FROM t ORDER BY a; -- SELECT with >1 params PREPARE p6(int, int) AS SELECT * FROM t WHERE a = $1+1 AND b = $2+1; EXPLAIN (COSTS OFF) EXECUTE p6(30, 40); - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------- Custom Scan (ColumnarScan) on t Filter: ((a = 31) AND (b = 41)) Columnar Projected Columns: a, b -(3 rows) + Columnar Chunk Group Filters: ((a = 31) AND (b = 41)) +(4 rows) EXECUTE p6(30, 40); a | b @@ -454,9 +459,10 @@ EXPLAIN (ANALYZE true, COSTS off, TIMING off, SUMMARY off) EXECUTE p6(50, 60); Custom Scan (ColumnarScan) on t (actual rows=1 loops=1) Filter: ((a = 51) AND (b = 61)) Rows Removed by Filter: 3 - Columnar Chunk Groups Removed by Filter: 9 Columnar Projected Columns: a, b -(5 rows) + Columnar Chunk Group Filters: ((a = 51) AND (b = 61)) + Columnar Chunk Groups Removed by Filter: 9 +(6 rows) SELECT * FROM t ORDER BY a; a | b diff --git a/src/test/regress/sql/columnar_chunk_filtering.sql b/src/test/regress/sql/columnar_chunk_filtering.sql index 81c845de9..3c3ed03b9 100644 --- a/src/test/regress/sql/columnar_chunk_filtering.sql +++ b/src/test/regress/sql/columnar_chunk_filtering.sql @@ -26,6 +26,7 @@ $$ END; $$ LANGUAGE PLPGSQL; +set columnar.qual_pushdown_correlation = 0.0; -- Create and load data -- chunk_group_row_limit '1000', stripe_row_limit '2000' @@ -152,3 +153,166 @@ create table part_2_columnar partition of part_table for values from (0) to (150 insert into part_table select generate_series(1,159999); select filtered_row_count('select count(*) from part_table where id > 75000'); drop table part_table; + +-- test join parameterization + +set columnar.stripe_row_limit = 2000; +set columnar.chunk_group_row_limit = 1000; + +create table r1(id1 int, n1 int); -- row +create table r2(id2 int, n2 int); -- row +create table r3(id3 int, n3 int); -- row +create table r4(id4 int, n4 int); -- row +create table r5(id5 int, n5 int); -- row +create table r6(id6 int, n6 int); -- row +create table r7(id7 int, n7 int); -- row + +create table coltest(id int, x1 int, x2 int, x3 int) using columnar; +create table coltest_part(id int, x1 int, x2 int, x3 int) + partition by range (id); +create table coltest_part0 + partition of coltest_part for values from (0) to (10000) + using columnar; +create table coltest_part1 + partition of coltest_part for values from (10000) to (20000); -- row + +set columnar.stripe_row_limit to default; +set columnar.chunk_group_row_limit to default; + +insert into r1 values(1234, 12350); +insert into r1 values(4567, 45000); +insert into r1 values(9101, 176000); +insert into r1 values(14202, 7); +insert into r1 values(18942, 189430); + +insert into r2 values(1234, 123502); +insert into r2 values(4567, 450002); +insert into r2 values(9101, 1760002); +insert into r2 values(14202, 72); +insert into r2 values(18942, 1894302); + +insert into r3 values(1234, 1235075); +insert into r3 values(4567, 4500075); +insert into r3 values(9101, 17600075); +insert into r3 values(14202, 775); +insert into r3 values(18942, 18943075); + +insert into r4 values(1234, -1); +insert into r5 values(1234, -1); +insert into r6 values(1234, -1); +insert into r7 values(1234, -1); + +insert into coltest + select g, g*10, g*100, g*1000 from generate_series(0, 19999) g; +insert into coltest_part + select g, g*10, g*100, g*1000 from generate_series(0, 19999) g; + +ANALYZE r1, r2, r3, coltest, coltest_part; + +-- force nested loop +set enable_mergejoin=false; +set enable_hashjoin=false; +set enable_material=false; + +-- test different kinds of expressions +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, coltest WHERE + id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0; +SELECT * FROM r1, coltest WHERE + id1 = id AND x1 > 15000 AND x1::text > '000000' AND n1 % 10 = 0; + +-- test equivalence classes + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE + id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND + id4 = id5 AND id5 = id6 AND id6 = id7; +SELECT * FROM r1, r2, r3, r4, r5, r6, r7, coltest WHERE + id = id1 AND id1 = id2 AND id2 = id3 AND id3 = id4 AND + id4 = id5 AND id5 = id6 AND id6 = id7; + +-- test path generation with different thresholds + +set columnar.planner_debug_level = 'notice'; +set columnar.max_custom_scan_paths to 10; + +EXPLAIN (costs off, timing off, summary off) + SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE + c1.id = c2.id and c1.id = c3.id and c1.id = c4.id; + +set columnar.max_custom_scan_paths to 2; + +EXPLAIN (costs off, timing off, summary off) + SELECT * FROM coltest c1, coltest c2, coltest c3, coltest c4 WHERE + c1.id = c2.id and c1.id = c3.id and c1.id = c4.id; + +set columnar.max_custom_scan_paths to default; + +set columnar.planner_debug_level to default; + +-- test more complex parameterization + +set columnar.planner_debug_level = 'notice'; + +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, r2, r3, coltest WHERE + id1 = id2 AND id2 = id3 AND id3 = id AND + n1 > x1 AND n2 > x2 AND n3 > x3; + +set columnar.planner_debug_level to default; + +SELECT * FROM r1, r2, r3, coltest WHERE + id1 = id2 AND id2 = id3 AND id3 = id AND + n1 > x1 AND n2 > x2 AND n3 > x3; + +-- test partitioning parameterization +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM r1, coltest_part WHERE + id1 = id AND n1 > x1; +SELECT * FROM r1, coltest_part WHERE + id1 = id AND n1 > x1; + +set enable_mergejoin to default; +set enable_hashjoin to default; +set enable_material to default; + +set columnar.planner_debug_level = 'notice'; + +alter table coltest add column x5 int default (random()*20000)::int; +analyze coltest; + +-- test that expressions on whole-row references are not pushed down +select * from coltest where coltest = (1,1,1,1); + +-- test that expressions on uncorrelated attributes are not pushed down +set columnar.qual_pushdown_correlation to default; +select * from coltest where x5 = 23484; + +-- test that expressions on volatile functions are not pushed down +create function vol() returns int language plpgsql as $$ +BEGIN + RETURN 1; +END; +$$; +select * from coltest where x3 = vol(); + +EXPLAIN (analyze on, costs off, timing off, summary off) + SELECT * FROM coltest c1 WHERE ceil(x1) > 4222; + +set columnar.planner_debug_level to default; + +-- +-- https://github.com/citusdata/citus/issues/4488 +-- +create table columnar_prepared_stmt (x int, y int) using columnar; +insert into columnar_prepared_stmt select s, s from generate_series(1,5000000) s; +prepare foo (int) as select x from columnar_prepared_stmt where x = $1; +execute foo(3); +execute foo(3); +execute foo(3); +execute foo(3); +select filtered_row_count('execute foo(3)'); +select filtered_row_count('execute foo(3)'); +select filtered_row_count('execute foo(3)'); +select filtered_row_count('execute foo(3)'); +drop table columnar_prepared_stmt; diff --git a/src/test/regress/sql/columnar_paths.sql b/src/test/regress/sql/columnar_paths.sql index e851f91f3..cdc70e88d 100644 --- a/src/test/regress/sql/columnar_paths.sql +++ b/src/test/regress/sql/columnar_paths.sql @@ -6,6 +6,9 @@ INSERT INTO full_correlated SELECT i, i::text FROM generate_series(1, 1000000) i CREATE INDEX full_correlated_btree ON full_correlated (a); ANALYZE full_correlated; +-- Prevent qual pushdown from competing with index scans. +SET columnar.enable_qual_pushdown = false; + SELECT columnar_test_helpers.uses_index_scan ( $$ SELECT a FROM full_correlated WHERE a=200; @@ -337,5 +340,35 @@ BEGIN; ); ROLLBACK; +SET columnar.enable_qual_pushdown TO DEFAULT; + +BEGIN; +SET LOCAL columnar.stripe_row_limit = 2000; +SET LOCAL columnar.chunk_group_row_limit = 1000; + +CREATE TABLE correlated(x int) using columnar; +INSERT INTO correlated + SELECT g FROM generate_series(1,100000) g; + +CREATE TABLE uncorrelated(x int) using columnar; +INSERT INTO uncorrelated + SELECT (g * 19) % 100000 FROM generate_series(1,100000) g; + +COMMIT; + +CREATE INDEX correlated_idx ON correlated(x); +CREATE INDEX uncorrelated_idx ON uncorrelated(x); +ANALYZE correlated, uncorrelated; + +-- should choose chunk group filtering; selective and correlated +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM correlated WHERE x = 78910; +SELECT * FROM correlated WHERE x = 78910; + +-- should choose index scan; selective but uncorrelated +EXPLAIN (analyze on, costs off, timing off, summary off) +SELECT * FROM uncorrelated WHERE x = 78910; +SELECT * FROM uncorrelated WHERE x = 78910; + SET client_min_messages TO WARNING; DROP SCHEMA columnar_paths CASCADE;