mirror of https://github.com/citusdata/citus.git
2133 lines
61 KiB
C
2133 lines
61 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* columnar_customscan.c
|
|
*
|
|
* This file contains the implementation of a postgres custom scan that
|
|
* we use to push down the projections into the table access methods.
|
|
*
|
|
* $Id$
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include <math.h>
|
|
|
|
#include "postgres.h"
|
|
|
|
#include "miscadmin.h"
|
|
|
|
#include "access/amapi.h"
|
|
#include "access/skey.h"
|
|
#include "catalog/pg_am.h"
|
|
#include "catalog/pg_statistic.h"
|
|
#include "commands/defrem.h"
|
|
#include "nodes/extensible.h"
|
|
#include "nodes/makefuncs.h"
|
|
#include "nodes/nodeFuncs.h"
|
|
#include "nodes/pg_list.h"
|
|
#include "nodes/plannodes.h"
|
|
#include "optimizer/cost.h"
|
|
#include "optimizer/optimizer.h"
|
|
#include "optimizer/pathnode.h"
|
|
#include "optimizer/paths.h"
|
|
#include "optimizer/plancat.h"
|
|
#include "optimizer/restrictinfo.h"
|
|
|
|
#include "citus_version.h"
|
|
#if PG_VERSION_NUM >= PG_VERSION_16
|
|
#include "parser/parse_relation.h"
|
|
#include "parser/parsetree.h"
|
|
#endif
|
|
#include "utils/builtins.h"
|
|
#include "utils/lsyscache.h"
|
|
#include "utils/relcache.h"
|
|
#include "utils/ruleutils.h"
|
|
#include "utils/selfuncs.h"
|
|
#include "utils/spccache.h"
|
|
|
|
#include "citus_version.h"
|
|
|
|
#include "columnar/columnar.h"
|
|
#include "columnar/columnar_customscan.h"
|
|
#include "columnar/columnar_metadata.h"
|
|
#include "columnar/columnar_tableam.h"
|
|
|
|
#include "distributed/listutils.h"
|
|
|
|
/*
|
|
* ColumnarScanState represents the state for a columnar scan. It's a
|
|
* CustomScanState with additional fields specific to columnar scans.
|
|
*/
|
|
typedef struct ColumnarScanState
|
|
{
|
|
CustomScanState custom_scanstate; /* must be first field */
|
|
|
|
ExprContext *css_RuntimeContext;
|
|
List *qual;
|
|
} ColumnarScanState;
|
|
|
|
|
|
typedef bool (*PathPredicate)(Path *path);
|
|
|
|
|
|
/* functions to cost paths in-place */
|
|
static void CostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId);
|
|
static void CostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
|
|
IndexPath *indexPath);
|
|
static void CostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path);
|
|
static void CostColumnarScan(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
|
|
CustomPath *cpath, int numberOfColumnsRead,
|
|
int nClauses);
|
|
|
|
/* functions to add new paths */
|
|
static void AddColumnarScanPaths(PlannerInfo *root, RelOptInfo *rel,
|
|
RangeTblEntry *rte);
|
|
static void AddColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
|
|
RangeTblEntry *rte, Relids required_relids);
|
|
|
|
/* helper functions to be used when costing paths or altering them */
|
|
static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate);
|
|
static bool IsNotIndexPath(Path *path);
|
|
static Cost ColumnarIndexScanAdditionalCost(PlannerInfo *root, RelOptInfo *rel,
|
|
Oid relationId, IndexPath *indexPath);
|
|
static int RelationIdGetNumberOfAttributes(Oid relationId);
|
|
static Cost ColumnarPerStripeScanCost(RelOptInfo *rel, Oid relationId,
|
|
int numberOfColumnsRead);
|
|
static uint64 ColumnarTableStripeCount(Oid relationId);
|
|
static Path * CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel,
|
|
Oid relationId);
|
|
static void AddColumnarScanPathsRec(PlannerInfo *root, RelOptInfo *rel,
|
|
RangeTblEntry *rte, Relids paramRelids,
|
|
Relids candidateRelids,
|
|
int depthLimit);
|
|
|
|
/* hooks and callbacks */
|
|
static void ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
|
RangeTblEntry *rte);
|
|
static void ColumnarGetRelationInfoHook(PlannerInfo *root, Oid relationObjectId,
|
|
bool inhparent, RelOptInfo *rel);
|
|
static Plan * ColumnarScanPath_PlanCustomPath(PlannerInfo *root,
|
|
RelOptInfo *rel,
|
|
struct CustomPath *best_path,
|
|
List *tlist,
|
|
List *clauses,
|
|
List *custom_plans);
|
|
static List * ColumnarScanPath_ReparameterizeCustomPathByChild(PlannerInfo *root,
|
|
List *custom_private,
|
|
RelOptInfo *child_rel);
|
|
static Node * ColumnarScan_CreateCustomScanState(CustomScan *cscan);
|
|
|
|
static void ColumnarScan_BeginCustomScan(CustomScanState *node, EState *estate,
|
|
int eflags);
|
|
static TupleTableSlot * ColumnarScan_ExecCustomScan(CustomScanState *node);
|
|
static void ColumnarScan_EndCustomScan(CustomScanState *node);
|
|
static void ColumnarScan_ReScanCustomScan(CustomScanState *node);
|
|
static void ColumnarScan_ExplainCustomScan(CustomScanState *node, List *ancestors,
|
|
ExplainState *es);
|
|
|
|
/* helper functions to build strings for EXPLAIN */
|
|
static const char * ColumnarPushdownClausesStr(List *context, List *clauses);
|
|
static const char * ColumnarProjectedColumnsStr(List *context,
|
|
List *projectedColumns);
|
|
static List * set_deparse_context_planstate(List *dpcontext, Node *node,
|
|
List *ancestors);
|
|
|
|
/* other helpers */
|
|
static List * ColumnarVarNeeded(ColumnarScanState *columnarScanState);
|
|
static Bitmapset * ColumnarAttrNeeded(ScanState *ss);
|
|
#if PG_VERSION_NUM >= PG_VERSION_16
|
|
static Bitmapset * fixup_inherited_columns(Oid parentId, Oid childId, Bitmapset *columns);
|
|
#endif
|
|
|
|
/* saved hook value in case of unload */
|
|
static set_rel_pathlist_hook_type PreviousSetRelPathlistHook = NULL;
|
|
static get_relation_info_hook_type PreviousGetRelationInfoHook = NULL;
|
|
|
|
static bool EnableColumnarCustomScan = true;
|
|
static bool EnableColumnarQualPushdown = true;
|
|
static double ColumnarQualPushdownCorrelationThreshold = 0.9;
|
|
static int ColumnarMaxCustomScanPaths = 64;
|
|
static int ColumnarPlannerDebugLevel = DEBUG3;
|
|
|
|
|
|
const struct CustomPathMethods ColumnarScanPathMethods = {
|
|
.CustomName = "ColumnarScan",
|
|
.PlanCustomPath = ColumnarScanPath_PlanCustomPath,
|
|
.ReparameterizeCustomPathByChild = ColumnarScanPath_ReparameterizeCustomPathByChild,
|
|
};
|
|
|
|
const struct CustomScanMethods ColumnarScanScanMethods = {
|
|
.CustomName = "ColumnarScan",
|
|
.CreateCustomScanState = ColumnarScan_CreateCustomScanState,
|
|
};
|
|
|
|
const struct CustomExecMethods ColumnarScanExecuteMethods = {
|
|
.CustomName = "ColumnarScan",
|
|
|
|
.BeginCustomScan = ColumnarScan_BeginCustomScan,
|
|
.ExecCustomScan = ColumnarScan_ExecCustomScan,
|
|
.EndCustomScan = ColumnarScan_EndCustomScan,
|
|
.ReScanCustomScan = ColumnarScan_ReScanCustomScan,
|
|
|
|
.ExplainCustomScan = ColumnarScan_ExplainCustomScan,
|
|
};
|
|
|
|
static const struct config_enum_entry debug_level_options[] = {
|
|
{ "debug5", DEBUG5, false },
|
|
{ "debug4", DEBUG4, false },
|
|
{ "debug3", DEBUG3, false },
|
|
{ "debug2", DEBUG2, false },
|
|
{ "debug1", DEBUG1, false },
|
|
{ "debug", DEBUG2, true },
|
|
{ "info", INFO, false },
|
|
{ "notice", NOTICE, false },
|
|
{ "warning", WARNING, false },
|
|
{ "log", LOG, false },
|
|
{ NULL, 0, false }
|
|
};
|
|
|
|
|
|
/*
|
|
* columnar_customscan_init installs the hook required to intercept the postgres planner and
|
|
* provide extra paths for columnar tables
|
|
*/
|
|
void
|
|
columnar_customscan_init()
|
|
{
|
|
PreviousSetRelPathlistHook = set_rel_pathlist_hook;
|
|
set_rel_pathlist_hook = ColumnarSetRelPathlistHook;
|
|
|
|
PreviousGetRelationInfoHook = get_relation_info_hook;
|
|
get_relation_info_hook = ColumnarGetRelationInfoHook;
|
|
|
|
/* register customscan specific GUC's */
|
|
DefineCustomBoolVariable(
|
|
"columnar.enable_custom_scan",
|
|
gettext_noop("Enables the use of a custom scan to push projections and quals "
|
|
"into the storage layer."),
|
|
NULL,
|
|
&EnableColumnarCustomScan,
|
|
true,
|
|
PGC_USERSET,
|
|
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
|
|
NULL, NULL, NULL);
|
|
DefineCustomBoolVariable(
|
|
"columnar.enable_qual_pushdown",
|
|
gettext_noop("Enables qual pushdown into columnar. This has no effect unless "
|
|
"columnar.enable_custom_scan is true."),
|
|
NULL,
|
|
&EnableColumnarQualPushdown,
|
|
true,
|
|
PGC_USERSET,
|
|
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
|
|
NULL, NULL, NULL);
|
|
DefineCustomRealVariable(
|
|
"columnar.qual_pushdown_correlation_threshold",
|
|
gettext_noop("Correlation threshold to attempt to push a qual "
|
|
"referencing the given column. A value of 0 means "
|
|
"attempt to push down all quals, even if the column "
|
|
"is uncorrelated."),
|
|
NULL,
|
|
&ColumnarQualPushdownCorrelationThreshold,
|
|
0.9,
|
|
0.0,
|
|
1.0,
|
|
PGC_USERSET,
|
|
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
|
|
NULL, NULL, NULL);
|
|
DefineCustomIntVariable(
|
|
"columnar.max_custom_scan_paths",
|
|
gettext_noop("Maximum number of custom scan paths to generate "
|
|
"for a columnar table when planning."),
|
|
NULL,
|
|
&ColumnarMaxCustomScanPaths,
|
|
64,
|
|
1,
|
|
1024,
|
|
PGC_USERSET,
|
|
GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE,
|
|
NULL, NULL, NULL);
|
|
DefineCustomEnumVariable(
|
|
"columnar.planner_debug_level",
|
|
"Message level for columnar planning information.",
|
|
NULL,
|
|
&ColumnarPlannerDebugLevel,
|
|
DEBUG3,
|
|
debug_level_options,
|
|
PGC_USERSET,
|
|
0,
|
|
NULL,
|
|
NULL,
|
|
NULL);
|
|
|
|
RegisterCustomScanMethods(&ColumnarScanScanMethods);
|
|
}
|
|
|
|
|
|
static void
|
|
ColumnarSetRelPathlistHook(PlannerInfo *root, RelOptInfo *rel, Index rti,
|
|
RangeTblEntry *rte)
|
|
{
|
|
/* call into previous hook if assigned */
|
|
if (PreviousSetRelPathlistHook)
|
|
{
|
|
PreviousSetRelPathlistHook(root, rel, rti, rte);
|
|
}
|
|
|
|
if (!OidIsValid(rte->relid) || rte->rtekind != RTE_RELATION || rte->inh)
|
|
{
|
|
/* some calls to the pathlist hook don't have a valid relation set. Do nothing */
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Here we want to inspect if this relation pathlist hook is accessing a columnar table.
|
|
* If that is the case we want to insert an extra path that pushes down the projection
|
|
* into the scan of the table to minimize the data read.
|
|
*/
|
|
Relation relation = RelationIdGetRelation(rte->relid);
|
|
if (!RelationIsValid(relation))
|
|
{
|
|
ereport(ERROR, (errmsg("could not open relation with OID %u", rte->relid)));
|
|
}
|
|
|
|
if (relation->rd_tableam == GetColumnarTableAmRoutine())
|
|
{
|
|
if (rte->tablesample != NULL)
|
|
{
|
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("sample scans not supported on columnar tables")));
|
|
}
|
|
|
|
if (list_length(rel->partial_pathlist) != 0)
|
|
{
|
|
/*
|
|
* Parallel scans on columnar tables are already discardad by
|
|
* ColumnarGetRelationInfoHook but be on the safe side.
|
|
*/
|
|
elog(ERROR, "parallel scans on columnar are not supported");
|
|
}
|
|
|
|
/*
|
|
* There are cases where IndexPath is normally more preferrable over
|
|
* SeqPath for heapAM but not for columnarAM. In such cases, an
|
|
* IndexPath could wrongly dominate a SeqPath based on the costs
|
|
* estimated by postgres earlier. For this reason, here we manually
|
|
* create a SeqPath, estimate the cost based on columnarAM and append
|
|
* to pathlist.
|
|
*
|
|
* Before doing that, we first re-cost all the existing paths so that
|
|
* add_path makes correct cost comparisons when appending our SeqPath.
|
|
*/
|
|
CostColumnarPaths(root, rel, rte->relid);
|
|
|
|
Path *seqPath = CreateColumnarSeqScanPath(root, rel, rte->relid);
|
|
add_path(rel, seqPath);
|
|
|
|
if (EnableColumnarCustomScan)
|
|
{
|
|
ereport(DEBUG1, (errmsg("pathlist hook for columnar table am")));
|
|
|
|
/*
|
|
* When columnar custom scan is enabled (columnar.enable_custom_scan),
|
|
* we only consider ColumnarScanPath's & IndexPath's. For this reason,
|
|
* we remove other paths and re-estimate IndexPath costs to make accurate
|
|
* comparisons between them.
|
|
*
|
|
* Even more, we might calculate an equal cost for a
|
|
* ColumnarCustomScan and a SeqPath if we are reading all columns
|
|
* of given table since we don't consider chunk group filtering
|
|
* when costing ColumnarCustomScan.
|
|
* In that case, if we don't remove SeqPath's, we might wrongly choose
|
|
* SeqPath thinking that its cost would be equal to ColumnarCustomScan.
|
|
*/
|
|
RemovePathsByPredicate(rel, IsNotIndexPath);
|
|
AddColumnarScanPaths(root, rel, rte);
|
|
}
|
|
}
|
|
RelationClose(relation);
|
|
}
|
|
|
|
|
|
static void
|
|
ColumnarGetRelationInfoHook(PlannerInfo *root, Oid relationObjectId,
|
|
bool inhparent, RelOptInfo *rel)
|
|
{
|
|
if (PreviousGetRelationInfoHook)
|
|
{
|
|
PreviousGetRelationInfoHook(root, relationObjectId, inhparent, rel);
|
|
}
|
|
|
|
if (IsColumnarTableAmTable(relationObjectId))
|
|
{
|
|
/* disable parallel query */
|
|
rel->rel_parallel_workers = 0;
|
|
|
|
/* disable index-only scan */
|
|
IndexOptInfo *indexOptInfo = NULL;
|
|
foreach_declared_ptr(indexOptInfo, rel->indexlist)
|
|
{
|
|
memset(indexOptInfo->canreturn, false, indexOptInfo->ncolumns * sizeof(bool));
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* RemovePathsByPredicate removes the paths that removePathPredicate
|
|
* evaluates to true from pathlist of given rel.
|
|
*/
|
|
static void
|
|
RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPredicate)
|
|
{
|
|
List *filteredPathList = NIL;
|
|
|
|
Path *path = NULL;
|
|
foreach_declared_ptr(path, rel->pathlist)
|
|
{
|
|
if (!removePathPredicate(path))
|
|
{
|
|
filteredPathList = lappend(filteredPathList, path);
|
|
}
|
|
}
|
|
|
|
rel->pathlist = filteredPathList;
|
|
}
|
|
|
|
|
|
/*
|
|
* IsNotIndexPath returns true if given path is not an IndexPath.
|
|
*/
|
|
static bool
|
|
IsNotIndexPath(Path *path)
|
|
{
|
|
return !IsA(path, IndexPath);
|
|
}
|
|
|
|
|
|
/*
|
|
* CreateColumnarSeqScanPath returns Path for sequential scan on columnar
|
|
* table with relationId.
|
|
*/
|
|
static Path *
|
|
CreateColumnarSeqScanPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
|
|
{
|
|
/* columnar doesn't support parallel scan */
|
|
int parallelWorkers = 0;
|
|
|
|
Relids requiredOuter = rel->lateral_relids;
|
|
Path *path = create_seqscan_path(root, rel, requiredOuter, parallelWorkers);
|
|
CostColumnarSeqPath(rel, relationId, path);
|
|
return path;
|
|
}
|
|
|
|
|
|
/*
|
|
* CostColumnarPaths re-costs paths of given RelOptInfo for
|
|
* columnar table with relationId.
|
|
*/
|
|
static void
|
|
CostColumnarPaths(PlannerInfo *root, RelOptInfo *rel, Oid relationId)
|
|
{
|
|
Path *path = NULL;
|
|
foreach_declared_ptr(path, rel->pathlist)
|
|
{
|
|
if (IsA(path, IndexPath))
|
|
{
|
|
/*
|
|
* Since we don't provide implementations for scan_bitmap_next_block
|
|
* & scan_bitmap_next_tuple, postgres doesn't generate bitmap index
|
|
* scan paths for columnar tables already (see related comments in
|
|
* TableAmRoutine). For this reason, we only consider IndexPath's
|
|
* here.
|
|
*/
|
|
CostColumnarIndexPath(root, rel, relationId, (IndexPath *) path);
|
|
}
|
|
else if (path->pathtype == T_SeqScan)
|
|
{
|
|
CostColumnarSeqPath(rel, relationId, path);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* CostColumnarIndexPath re-costs given index path for columnar table with
|
|
* relationId.
|
|
*/
|
|
static void
|
|
CostColumnarIndexPath(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
|
|
IndexPath *indexPath)
|
|
{
|
|
if (!enable_indexscan)
|
|
{
|
|
/* costs are already set to disable_cost, don't adjust them */
|
|
return;
|
|
}
|
|
|
|
ereport(DEBUG4, (errmsg("columnar table index scan costs estimated by "
|
|
"indexAM: startup cost = %.10f, total cost = "
|
|
"%.10f", indexPath->path.startup_cost,
|
|
indexPath->path.total_cost)));
|
|
|
|
/*
|
|
* We estimate the cost for columnar table read during index scan. Also,
|
|
* instead of overwriting total cost, we "add" ours to the cost estimated
|
|
* by indexAM since we should consider index traversal related costs too.
|
|
*/
|
|
Cost columnarIndexScanCost = ColumnarIndexScanAdditionalCost(root, rel, relationId,
|
|
indexPath);
|
|
indexPath->path.total_cost += columnarIndexScanCost;
|
|
|
|
ereport(DEBUG4, (errmsg("columnar table index scan costs re-estimated "
|
|
"by columnarAM (including indexAM costs): "
|
|
"startup cost = %.10f, total cost = %.10f",
|
|
indexPath->path.startup_cost,
|
|
indexPath->path.total_cost)));
|
|
}
|
|
|
|
|
|
/*
|
|
* ColumnarIndexScanAdditionalCost returns additional cost estimated for
|
|
* index scan described by IndexPath for columnar table with relationId.
|
|
*/
|
|
static Cost
|
|
ColumnarIndexScanAdditionalCost(PlannerInfo *root, RelOptInfo *rel,
|
|
Oid relationId, IndexPath *indexPath)
|
|
{
|
|
int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
|
|
Cost perStripeCost = ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
|
|
|
|
/*
|
|
* We don't need to pass correct loop count to amcostestimate since we
|
|
* will only use index correlation & index selectivity, and loop count
|
|
* doesn't have any effect on those two.
|
|
*/
|
|
double fakeLoopCount = 1;
|
|
Cost fakeIndexStartupCost;
|
|
Cost fakeIndexTotalCost;
|
|
double fakeIndexPages;
|
|
Selectivity indexSelectivity;
|
|
double indexCorrelation;
|
|
amcostestimate_function amcostestimate = indexPath->indexinfo->amcostestimate;
|
|
amcostestimate(root, indexPath, fakeLoopCount, &fakeIndexStartupCost,
|
|
&fakeIndexTotalCost, &indexSelectivity,
|
|
&indexCorrelation, &fakeIndexPages);
|
|
|
|
Relation relation = RelationIdGetRelation(relationId);
|
|
if (!RelationIsValid(relation))
|
|
{
|
|
ereport(ERROR, (errmsg("could not open relation with OID %u", relationId)));
|
|
}
|
|
|
|
uint64 rowCount = ColumnarTableRowCount(relation);
|
|
RelationClose(relation);
|
|
double estimatedRows = rowCount * indexSelectivity;
|
|
|
|
/*
|
|
* In the worst case (i.e no correlation between the column & the index),
|
|
* we need to read a different stripe for each row.
|
|
*/
|
|
double maxStripeReadCount = estimatedRows;
|
|
|
|
/*
|
|
* In the best case (i.e the column is fully correlated with the index),
|
|
* we wouldn't read the same stripe again and again thanks
|
|
* to locality.
|
|
*/
|
|
double avgStripeRowCount =
|
|
rowCount / (double) ColumnarTableStripeCount(relationId);
|
|
double minStripeReadCount = estimatedRows / avgStripeRowCount;
|
|
|
|
/*
|
|
* While being close to 0 means low correlation, being close to -1 or +1
|
|
* means high correlation. For index scans on columnar tables, it doesn't
|
|
* matter if the column and the index are "correlated" (+1) or
|
|
* "anti-correlated" (-1) since both help us avoiding from reading the
|
|
* same stripe again and again.
|
|
*/
|
|
double absIndexCorrelation = float_abs(indexCorrelation);
|
|
|
|
/*
|
|
* To estimate the number of stripes that we need to read, we do linear
|
|
* interpolation between minStripeReadCount & maxStripeReadCount. To do
|
|
* that, we use complement to 1 of absolute correlation, where being
|
|
* close to 0 means high correlation and being close to 1 means low
|
|
* correlation.
|
|
* In practice, we only want to do an index scan when absIndexCorrelation
|
|
* is 1 (or extremely close to it), or when the absolute number of tuples
|
|
* returned is very small. Other cases will have a prohibitive cost.
|
|
*/
|
|
double complementIndexCorrelation = 1 - absIndexCorrelation;
|
|
double estimatedStripeReadCount =
|
|
minStripeReadCount + complementIndexCorrelation * (maxStripeReadCount -
|
|
minStripeReadCount);
|
|
|
|
/* even in the best case, we will read a single stripe */
|
|
estimatedStripeReadCount = Max(estimatedStripeReadCount, 1.0);
|
|
|
|
Cost scanCost = perStripeCost * estimatedStripeReadCount;
|
|
|
|
ereport(DEBUG4, (errmsg("re-costing index scan for columnar table: "
|
|
"selectivity = %.10f, complement abs "
|
|
"correlation = %.10f, per stripe cost = %.10f, "
|
|
"estimated stripe read count = %.10f, "
|
|
"total additional cost = %.10f",
|
|
indexSelectivity, complementIndexCorrelation,
|
|
perStripeCost, estimatedStripeReadCount,
|
|
scanCost)));
|
|
|
|
return scanCost;
|
|
}
|
|
|
|
|
|
/*
|
|
* CostColumnarSeqPath sets costs given seq path for columnar table with
|
|
* relationId.
|
|
*/
|
|
static void
|
|
CostColumnarSeqPath(RelOptInfo *rel, Oid relationId, Path *path)
|
|
{
|
|
if (!enable_seqscan)
|
|
{
|
|
/* costs are already set to disable_cost, don't adjust them */
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Seq scan doesn't support projection or qual pushdown, so we will read
|
|
* all the stripes and all the columns.
|
|
*/
|
|
double stripesToRead = ColumnarTableStripeCount(relationId);
|
|
int numberOfColumnsRead = RelationIdGetNumberOfAttributes(relationId);
|
|
|
|
path->startup_cost = 0;
|
|
path->total_cost = stripesToRead *
|
|
ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
|
|
}
|
|
|
|
|
|
/*
|
|
* RelationIdGetNumberOfAttributes returns number of attributes that relation
|
|
* with relationId has.
|
|
*/
|
|
static int
|
|
RelationIdGetNumberOfAttributes(Oid relationId)
|
|
{
|
|
Relation relation = RelationIdGetRelation(relationId);
|
|
if (!RelationIsValid(relation))
|
|
{
|
|
ereport(ERROR, (errmsg("could not open relation with OID %u", relationId)));
|
|
}
|
|
|
|
int nattrs = relation->rd_att->natts;
|
|
RelationClose(relation);
|
|
return nattrs;
|
|
}
|
|
|
|
|
|
/*
|
|
* CheckVarStats() checks whether a qual involving this Var is likely to be
|
|
* useful based on the correlation stats. If so, or if stats are unavailable,
|
|
* return true; otherwise return false and sets absVarCorrelation in case
|
|
* caller wants to use for logging purposes.
|
|
*/
|
|
static bool
|
|
CheckVarStats(PlannerInfo *root, Var *var, Oid sortop, float4 *absVarCorrelation)
|
|
{
|
|
/*
|
|
* Collect isunique, ndistinct, and varCorrelation.
|
|
*/
|
|
VariableStatData varStatData;
|
|
examine_variable(root, (Node *) var, var->varno, &varStatData);
|
|
if (varStatData.rel == NULL ||
|
|
!HeapTupleIsValid(varStatData.statsTuple))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
AttStatsSlot sslot;
|
|
if (!get_attstatsslot(&sslot, varStatData.statsTuple,
|
|
STATISTIC_KIND_CORRELATION, sortop,
|
|
ATTSTATSSLOT_NUMBERS))
|
|
{
|
|
ReleaseVariableStats(varStatData);
|
|
return true;
|
|
}
|
|
|
|
Assert(sslot.nnumbers == 1);
|
|
|
|
float4 varCorrelation = sslot.numbers[0];
|
|
|
|
ReleaseVariableStats(varStatData);
|
|
|
|
/*
|
|
* If the Var is not highly correlated, then the chunk's min/max bounds
|
|
* will be nearly useless.
|
|
*/
|
|
if (float_abs(varCorrelation) < ColumnarQualPushdownCorrelationThreshold)
|
|
{
|
|
if (absVarCorrelation)
|
|
{
|
|
/*
|
|
* Report absVarCorrelation if caller wants to know why given
|
|
* var is rejected.
|
|
*/
|
|
*absVarCorrelation = float_abs(varCorrelation);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* ExprReferencesRelid returns true if any of the Expr's Vars refer to the
|
|
* given relid; false otherwise.
|
|
*/
|
|
static bool
|
|
ExprReferencesRelid(Expr *expr, Index relid)
|
|
{
|
|
List *exprVars = pull_var_clause(
|
|
(Node *) expr, PVC_RECURSE_AGGREGATES |
|
|
PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS);
|
|
ListCell *lc;
|
|
foreach(lc, exprVars)
|
|
{
|
|
Var *var = (Var *) lfirst(lc);
|
|
if (var->varno == relid)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
* ExtractPushdownClause extracts an Expr node from given clause for pushing down
|
|
* into the given rel (including join clauses). This test may not be exact in
|
|
* all cases; it's used to reduce the search space for parameterization.
|
|
*
|
|
* Note that we don't try to handle cases like "Var + ExtParam = 3". That
|
|
* would require going through eval_const_expression after parameter binding,
|
|
* and that doesn't seem worth the effort. Here we just look for "Var op Expr"
|
|
* or "Expr op Var", where Var references rel and Expr references other rels
|
|
* (or no rels at all).
|
|
*
|
|
* Moreover, this function also looks into BoolExpr's to recursively extract
|
|
* pushdownable OpExpr's of them:
|
|
* i) AND_EXPR:
|
|
* Take pushdownable args of AND expressions by ignoring the other args.
|
|
* ii) OR_EXPR:
|
|
* Ignore the whole OR expression if we cannot exract a pushdownable Expr
|
|
* from one of its args.
|
|
* iii) NOT_EXPR:
|
|
* Simply ignore NOT expressions since we don't expect to see them before
|
|
* an expression that we can pushdown, see the comment in function.
|
|
*
|
|
* The reasoning for those three rules could also be summarized as such;
|
|
* for any expression that we cannot push-down, we must assume that it
|
|
* evaluates to true.
|
|
*
|
|
* For example, given following WHERE clause:
|
|
* (
|
|
* (a > random() OR a < 30)
|
|
* AND
|
|
* a < 200
|
|
* ) OR
|
|
* (
|
|
* a = 300
|
|
* OR
|
|
* a > 400
|
|
* );
|
|
* Even if we can pushdown (a < 30), we cannot pushdown (a > random() OR a < 30)
|
|
* due to (a > random()). However, we can pushdown (a < 200), so we extract
|
|
* (a < 200) from the lhs of the top level OR expression.
|
|
*
|
|
* For the rhs of the top level OR expression, since we can pushdown both (a = 300)
|
|
* and (a > 400), we take this part as is.
|
|
*
|
|
* Finally, since both sides of the top level OR expression yielded pushdownable
|
|
* expressions, we will pushdown the following:
|
|
* (a < 200) OR ((a = 300) OR (a > 400))
|
|
*/
|
|
static Expr *
|
|
ExtractPushdownClause(PlannerInfo *root, RelOptInfo *rel, Node *node)
|
|
{
|
|
CHECK_FOR_INTERRUPTS();
|
|
check_stack_depth();
|
|
|
|
if (node == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
if (IsA(node, BoolExpr))
|
|
{
|
|
BoolExpr *boolExpr = castNode(BoolExpr, node);
|
|
if (boolExpr->boolop == NOT_EXPR)
|
|
{
|
|
/*
|
|
* Standard planner should have already applied de-morgan rule to
|
|
* simple NOT expressions. If we encounter with such an expression
|
|
* here, then it can't be a pushdownable one, such as:
|
|
* WHERE id NOT IN (SELECT id FROM something).
|
|
*/
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"must not contain a subplan")));
|
|
return NULL;
|
|
}
|
|
|
|
List *pushdownableArgs = NIL;
|
|
|
|
Node *boolExprArg = NULL;
|
|
foreach_declared_ptr(boolExprArg, boolExpr->args)
|
|
{
|
|
Expr *pushdownableArg = ExtractPushdownClause(root, rel,
|
|
(Node *) boolExprArg);
|
|
if (pushdownableArg)
|
|
{
|
|
pushdownableArgs = lappend(pushdownableArgs, pushdownableArg);
|
|
}
|
|
else if (boolExpr->boolop == OR_EXPR)
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"all arguments of an OR expression must be "
|
|
"pushdownable but one of them was not, due "
|
|
"to the reason given above")));
|
|
return NULL;
|
|
}
|
|
|
|
/* simply skip AND args that we cannot pushdown */
|
|
}
|
|
|
|
int npushdownableArgs = list_length(pushdownableArgs);
|
|
if (npushdownableArgs == 0)
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"none of the arguments were pushdownable, "
|
|
"due to the reason(s) given above ")));
|
|
return NULL;
|
|
}
|
|
else if (npushdownableArgs == 1)
|
|
{
|
|
return (Expr *) linitial(pushdownableArgs);
|
|
}
|
|
|
|
if (boolExpr->boolop == AND_EXPR)
|
|
{
|
|
return make_andclause(pushdownableArgs);
|
|
}
|
|
else if (boolExpr->boolop == OR_EXPR)
|
|
{
|
|
return make_orclause(pushdownableArgs);
|
|
}
|
|
else
|
|
{
|
|
/* already discarded NOT expr, so should not be reachable */
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
if (IsA(node, ScalarArrayOpExpr))
|
|
{
|
|
if (!contain_volatile_functions(node))
|
|
{
|
|
return (Expr *) node;
|
|
}
|
|
else
|
|
{
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
if (!IsA(node, OpExpr) || list_length(((OpExpr *) node)->args) != 2)
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"must be binary operator expression")));
|
|
return NULL;
|
|
}
|
|
|
|
OpExpr *opExpr = castNode(OpExpr, node);
|
|
Expr *lhs = list_nth(opExpr->args, 0);
|
|
Expr *rhs = list_nth(opExpr->args, 1);
|
|
|
|
Var *varSide;
|
|
Expr *exprSide;
|
|
|
|
if (IsA(lhs, Var) && ((Var *) lhs)->varno == rel->relid &&
|
|
!ExprReferencesRelid((Expr *) rhs, rel->relid))
|
|
{
|
|
varSide = castNode(Var, lhs);
|
|
exprSide = rhs;
|
|
}
|
|
else if (IsA(rhs, Var) && ((Var *) rhs)->varno == rel->relid &&
|
|
!ExprReferencesRelid((Expr *) lhs, rel->relid))
|
|
{
|
|
varSide = castNode(Var, rhs);
|
|
exprSide = lhs;
|
|
}
|
|
else
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"must match 'Var <op> Expr' or 'Expr <op> Var'"),
|
|
errhint("Var must only reference this rel, "
|
|
"and Expr must not reference this rel")));
|
|
return NULL;
|
|
}
|
|
|
|
if (varSide->varattno <= 0)
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"var is whole-row reference or system column")));
|
|
return NULL;
|
|
}
|
|
|
|
if (contain_volatile_functions((Node *) exprSide))
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"expr contains volatile functions")));
|
|
return NULL;
|
|
}
|
|
|
|
/* only the default opclass is used for qual pushdown. */
|
|
Oid varOpClass = GetDefaultOpClass(varSide->vartype, BTREE_AM_OID);
|
|
Oid varOpFamily;
|
|
Oid varOpcInType;
|
|
|
|
if (!OidIsValid(varOpClass) ||
|
|
!get_opclass_opfamily_and_input_type(varOpClass, &varOpFamily,
|
|
&varOpcInType))
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"cannot find default btree opclass and opfamily for type: %s",
|
|
format_type_be(varSide->vartype))));
|
|
return NULL;
|
|
}
|
|
|
|
if (!op_in_opfamily(opExpr->opno, varOpFamily))
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"operator %d not a member of opfamily %d",
|
|
opExpr->opno, varOpFamily)));
|
|
return NULL;
|
|
}
|
|
|
|
Oid sortop = get_opfamily_member(varOpFamily, varOpcInType,
|
|
varOpcInType, BTLessStrategyNumber);
|
|
Assert(OidIsValid(sortop));
|
|
|
|
/*
|
|
* Check that statistics on the Var support the utility of this
|
|
* clause.
|
|
*/
|
|
float4 absVarCorrelation = 0;
|
|
if (!CheckVarStats(root, varSide, sortop, &absVarCorrelation))
|
|
{
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: cannot push down clause: "
|
|
"absolute correlation (%.3f) of var attribute %d is "
|
|
"smaller than the value configured in "
|
|
"\"columnar.qual_pushdown_correlation_threshold\" "
|
|
"(%.3f)", absVarCorrelation, varSide->varattno,
|
|
ColumnarQualPushdownCorrelationThreshold)));
|
|
return NULL;
|
|
}
|
|
|
|
return (Expr *) node;
|
|
}
|
|
|
|
|
|
/*
|
|
* FilterPushdownClauses filters for clauses that are candidates for pushing
|
|
* down into rel.
|
|
*/
|
|
static List *
|
|
FilterPushdownClauses(PlannerInfo *root, RelOptInfo *rel, List *inputClauses)
|
|
{
|
|
List *filteredClauses = NIL;
|
|
ListCell *lc;
|
|
foreach(lc, inputClauses)
|
|
{
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
|
|
|
|
/*
|
|
* Ignore clauses that don't refer to this rel, and pseudoconstants.
|
|
*
|
|
* XXX: A pseudoconstant may be of use, but it doesn't make sense to
|
|
* push it down because it doesn't contain any Vars. Look into if
|
|
* there's something we should do with pseudoconstants here.
|
|
*/
|
|
if (rinfo->pseudoconstant ||
|
|
!bms_is_member(rel->relid, rinfo->required_relids))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
Expr *pushdownableExpr = ExtractPushdownClause(root, rel, (Node *) rinfo->clause);
|
|
if (!pushdownableExpr)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
rinfo = copyObject(rinfo);
|
|
rinfo->clause = pushdownableExpr;
|
|
filteredClauses = lappend(filteredClauses, rinfo);
|
|
}
|
|
|
|
return filteredClauses;
|
|
}
|
|
|
|
|
|
/*
|
|
* PushdownJoinClauseMatches is a callback that returns true, indicating that
|
|
* we want all of the clauses from generate_implied_equalities_for_column().
|
|
*/
|
|
static bool
|
|
PushdownJoinClauseMatches(PlannerInfo *root, RelOptInfo *rel,
|
|
EquivalenceClass *ec, EquivalenceMember *em,
|
|
void *arg)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
* FindPushdownJoinClauses finds join clauses, including those implied by ECs,
|
|
* that may be pushed down.
|
|
*/
|
|
static List *
|
|
FindPushdownJoinClauses(PlannerInfo *root, RelOptInfo *rel)
|
|
{
|
|
List *joinClauses = copyObject(rel->joininfo);
|
|
|
|
/*
|
|
* Here we are generating the clauses just so we can later extract the
|
|
* interesting relids. This is somewhat wasteful, but it allows us to
|
|
* filter out joinclauses, reducing the number of relids we need to
|
|
* consider.
|
|
*
|
|
* XXX: also find additional clauses for joininfo that are implied by ECs?
|
|
*/
|
|
List *ecClauses = generate_implied_equalities_for_column(
|
|
root, rel, PushdownJoinClauseMatches, NULL,
|
|
rel->lateral_referencers);
|
|
List *allClauses = list_concat(joinClauses, ecClauses);
|
|
|
|
return FilterPushdownClauses(root, rel, allClauses);
|
|
}
|
|
|
|
|
|
/*
|
|
* FindCandidateRelids identifies candidate rels for parameterization from the
|
|
* list of join clauses.
|
|
*
|
|
* Some rels cannot be considered for parameterization, such as a partitioned
|
|
* parent of the given rel. Other rels are just not useful because they don't
|
|
* appear in a join clause that could be pushed down.
|
|
*/
|
|
static Relids
|
|
FindCandidateRelids(PlannerInfo *root, RelOptInfo *rel, List *joinClauses)
|
|
{
|
|
Relids candidateRelids = NULL;
|
|
ListCell *lc;
|
|
foreach(lc, joinClauses)
|
|
{
|
|
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
|
|
|
|
candidateRelids = bms_add_members(candidateRelids,
|
|
rinfo->required_relids);
|
|
}
|
|
|
|
candidateRelids = bms_del_members(candidateRelids, rel->relids);
|
|
candidateRelids = bms_del_members(candidateRelids, rel->lateral_relids);
|
|
|
|
/*
|
|
* For the relevant PG16 commit requiring this addition:
|
|
* postgres/postgres@2489d76
|
|
*/
|
|
#if PG_VERSION_NUM >= PG_VERSION_16
|
|
candidateRelids = bms_del_members(candidateRelids, root->outer_join_rels);
|
|
#endif
|
|
|
|
return candidateRelids;
|
|
}
|
|
|
|
|
|
/*
|
|
* Combinations() calculates the number of combinations of n things taken k at
|
|
* a time. When the correct result is large, the calculation may produce a
|
|
* non-integer result, or overflow to inf, which caller should handle
|
|
* appropriately.
|
|
*
|
|
* Use the following two formulae from Knuth TAoCP, 1.2.6:
|
|
* (2) Combinations(n, k) = (n*(n-1)..(n-k+1)) / (k*(k-1)..1)
|
|
* (5) Combinations(n, k) = Combinations(n, n-k)
|
|
*/
|
|
static double
|
|
Combinations(int n, int k)
|
|
{
|
|
double v = 1;
|
|
|
|
/*
|
|
* If k is close to n, then both the numerator and the denominator are
|
|
* close to n!, and we may overflow even if the input is reasonable
|
|
* (e.g. Combinations(500, 500)). Use formula (5) to choose the smaller,
|
|
* but equivalent, k.
|
|
*/
|
|
k = Min(k, n - k);
|
|
|
|
/* calculate numerator of formula (2) first */
|
|
for (int i = n; i >= n - k + 1; i--)
|
|
{
|
|
v *= i;
|
|
}
|
|
|
|
/*
|
|
* Divide by each factor in the denominator of formula (2), skipping
|
|
* division by 1.
|
|
*/
|
|
for (int i = k; i >= 2; i--)
|
|
{
|
|
v /= i;
|
|
}
|
|
|
|
return v;
|
|
}
|
|
|
|
|
|
/*
|
|
* ChooseDepthLimit() calculates the depth limit for the parameterization
|
|
* search, given the number of candidate relations.
|
|
*
|
|
* The maximum number of paths generated for a given depthLimit is:
|
|
*
|
|
* Combinations(nCandidates, 0) + Combinations(nCandidates, 1) + ... +
|
|
* Combinations(nCandidates, depthLimit)
|
|
*
|
|
* There's no closed formula for a partial sum of combinations, so just keep
|
|
* increasing the depth until the number of combinations exceeds the limit.
|
|
*/
|
|
static int
|
|
ChooseDepthLimit(int nCandidates)
|
|
{
|
|
if (!EnableColumnarQualPushdown)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
int depth = 0;
|
|
double numPaths = 1;
|
|
|
|
while (depth < nCandidates)
|
|
{
|
|
numPaths += Combinations(nCandidates, depth + 1);
|
|
|
|
if (numPaths > (double) ColumnarMaxCustomScanPaths)
|
|
{
|
|
break;
|
|
}
|
|
|
|
depth++;
|
|
}
|
|
|
|
return depth;
|
|
}
|
|
|
|
|
|
/*
|
|
* AddColumnarScanPaths is the entry point for recursively generating
|
|
* parameterized paths. See AddColumnarScanPathsRec() for discussion.
|
|
*/
|
|
static void
|
|
AddColumnarScanPaths(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
|
|
{
|
|
List *joinClauses = FindPushdownJoinClauses(root, rel);
|
|
Relids candidateRelids = FindCandidateRelids(root, rel, joinClauses);
|
|
|
|
int depthLimit = ChooseDepthLimit(bms_num_members(candidateRelids));
|
|
|
|
/* must always parameterize by lateral refs */
|
|
Relids paramRelids = bms_copy(rel->lateral_relids);
|
|
|
|
AddColumnarScanPathsRec(root, rel, rte, paramRelids, candidateRelids,
|
|
depthLimit);
|
|
}
|
|
|
|
|
|
/*
|
|
* AddColumnarScanPathsRec is a recursive function to search the
|
|
* parameterization space and add CustomPaths for columnar scans.
|
|
*
|
|
* The set paramRelids is the parameterization at the current level, and
|
|
* candidateRelids is the set from which we draw to generate paths with
|
|
* greater parameterization.
|
|
*
|
|
* Columnar tables resemble indexes because of the ability to push down
|
|
* quals. Ordinary quals, such as x = 7, can be pushed down easily. But join
|
|
* quals of the form "x = y" (where "y" comes from another rel) require the
|
|
* proper parameterization.
|
|
*
|
|
* Paths that require more outer rels can push down more join clauses that
|
|
* depend on those outer rels. But requiring more outer rels gives the planner
|
|
* fewer options for the shape of the plan. That means there is a trade-off,
|
|
* and we should generate plans of various parameterizations, then let the
|
|
* planner choose. We always need to generate one minimally-parameterized path
|
|
* (parameterized only by lateral refs, if present) to make sure that at least
|
|
* one path can be chosen. Then, we generate as many parameterized paths as we
|
|
* reasonably can.
|
|
*
|
|
* The set of all possible parameterizations is the power set of
|
|
* candidateRelids. The power set has cardinality 2^N, where N is the
|
|
* cardinality of candidateRelids. To avoid creating a huge number of paths,
|
|
* limit the depth of the search; the depthLimit is equivalent to the maximum
|
|
* number of required outer rels (beyond the minimal parameterization) for the
|
|
* path. A depthLimit of zero means that only the minimally-parameterized path
|
|
* will be generated.
|
|
*/
|
|
static void
|
|
AddColumnarScanPathsRec(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte,
|
|
Relids paramRelids, Relids candidateRelids,
|
|
int depthLimit)
|
|
{
|
|
CHECK_FOR_INTERRUPTS();
|
|
check_stack_depth();
|
|
|
|
Assert(!bms_overlap(paramRelids, candidateRelids));
|
|
AddColumnarScanPath(root, rel, rte, paramRelids);
|
|
|
|
/* recurse for all candidateRelids, unless we hit the depth limit */
|
|
Assert(depthLimit >= 0);
|
|
if (depthLimit-- == 0)
|
|
{
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Iterate through parameter combinations depth-first. Deeper levels
|
|
* generate paths of greater parameterization (and hopefully lower
|
|
* cost).
|
|
*/
|
|
Relids tmpCandidateRelids = bms_copy(candidateRelids);
|
|
int relid = -1;
|
|
while ((relid = bms_next_member(candidateRelids, relid)) >= 0)
|
|
{
|
|
Relids tmpParamRelids = bms_add_member(
|
|
bms_copy(paramRelids), relid);
|
|
|
|
/*
|
|
* Because we are generating combinations (not permutations), remove
|
|
* the relid from the set of candidates at this level as we descend to
|
|
* the next.
|
|
*/
|
|
tmpCandidateRelids = bms_del_member(tmpCandidateRelids, relid);
|
|
|
|
AddColumnarScanPathsRec(root, rel, rte, tmpParamRelids,
|
|
tmpCandidateRelids, depthLimit);
|
|
}
|
|
|
|
bms_free(tmpCandidateRelids);
|
|
}
|
|
|
|
|
|
/*
|
|
* ParameterizationAsString returns the string representation of the set of
|
|
* rels given in paramRelids.
|
|
*
|
|
* Takes a StringInfo so that it doesn't return palloc'd memory. This makes it
|
|
* easy to call this function as an argument to ereport(), such that it won't
|
|
* be evaluated unless the message is going to be output somewhere.
|
|
*/
|
|
static char *
|
|
ParameterizationAsString(PlannerInfo *root, Relids paramRelids, StringInfo buf)
|
|
{
|
|
bool firstTime = true;
|
|
int relid = -1;
|
|
|
|
if (bms_num_members(paramRelids) == 0)
|
|
{
|
|
return "unparameterized";
|
|
}
|
|
|
|
appendStringInfoString(buf, "parameterized by rels {");
|
|
while ((relid = bms_next_member(paramRelids, relid)) >= 0)
|
|
{
|
|
RangeTblEntry *rte = root->simple_rte_array[relid];
|
|
const char *relname = quote_identifier(rte->eref->aliasname);
|
|
|
|
appendStringInfo(buf, "%s%s", firstTime ? "" : ", ", relname);
|
|
|
|
if (relname != rte->eref->aliasname)
|
|
{
|
|
pfree((void *) relname);
|
|
}
|
|
|
|
firstTime = false;
|
|
}
|
|
appendStringInfoString(buf, "}");
|
|
return buf->data;
|
|
}
|
|
|
|
|
|
/*
|
|
* ContainsExecParams tests whether the node contains any exec params. The
|
|
* signature accepts an extra argument for use with expression_tree_walker.
|
|
*/
|
|
static bool
|
|
ContainsExecParams(Node *node, void *notUsed)
|
|
{
|
|
if (node == NULL)
|
|
{
|
|
return false;
|
|
}
|
|
else if (IsA(node, Param))
|
|
{
|
|
Param *param = castNode(Param, node);
|
|
if (param->paramkind == PARAM_EXEC)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
return expression_tree_walker(node, ContainsExecParams, NULL);
|
|
}
|
|
|
|
|
|
/*
|
|
* Create and add a path with the given parameterization paramRelids.
|
|
*
|
|
* XXX: Consider refactoring to be more like postgresGetForeignPaths(). The
|
|
* only differences are param_info and custom_private.
|
|
*/
|
|
static void
|
|
AddColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte,
|
|
Relids paramRelids)
|
|
{
|
|
/*
|
|
* Must return a CustomPath, not a larger structure containing a
|
|
* CustomPath as the first field. Otherwise, nodeToString() will fail to
|
|
* output the additional fields.
|
|
*/
|
|
CustomPath *cpath = makeNode(CustomPath);
|
|
|
|
cpath->methods = &ColumnarScanPathMethods;
|
|
|
|
/* necessary to avoid extra Result node in PG15 */
|
|
cpath->flags = CUSTOMPATH_SUPPORT_PROJECTION;
|
|
|
|
/*
|
|
* populate generic path information
|
|
*/
|
|
Path *path = &cpath->path;
|
|
path->pathtype = T_CustomScan;
|
|
path->parent = rel;
|
|
path->pathtarget = rel->reltarget;
|
|
|
|
/* columnar scans are not parallel-aware, but they are parallel-safe */
|
|
path->parallel_safe = rel->consider_parallel;
|
|
|
|
path->param_info = get_baserel_parampathinfo(root, rel, paramRelids);
|
|
|
|
/*
|
|
* Usable clauses for this parameterization exist in baserestrictinfo and
|
|
* ppi_clauses.
|
|
*/
|
|
List *allClauses = copyObject(rel->baserestrictinfo);
|
|
if (path->param_info != NULL)
|
|
{
|
|
allClauses = list_concat(allClauses, path->param_info->ppi_clauses);
|
|
}
|
|
|
|
allClauses = FilterPushdownClauses(root, rel, allClauses);
|
|
|
|
/*
|
|
* Plain clauses may contain extern params, but not exec params, and can
|
|
* be evaluated at init time or rescan time. Track them in another list
|
|
* that is a subset of allClauses.
|
|
*
|
|
* Note: although typically baserestrictinfo contains plain clauses,
|
|
* that's not always true. It can also contain a qual referencing a Var at
|
|
* a higher query level, which can be turned into an exec param, and
|
|
* therefore it won't be a plain clause.
|
|
*/
|
|
List *plainClauses = NIL;
|
|
ListCell *lc;
|
|
foreach(lc, allClauses)
|
|
{
|
|
RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
|
|
if (bms_is_subset(rinfo->required_relids, rel->relids) &&
|
|
!ContainsExecParams((Node *) rinfo->clause, NULL))
|
|
{
|
|
plainClauses = lappend(plainClauses, rinfo);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* We can't make our own CustomPath structure, so we need to put
|
|
* everything in the custom_private list. To keep the two lists separate,
|
|
* we make them sublists in a 2-element list.
|
|
*/
|
|
if (EnableColumnarQualPushdown)
|
|
{
|
|
cpath->custom_private = list_make2(copyObject(plainClauses),
|
|
copyObject(allClauses));
|
|
}
|
|
else
|
|
{
|
|
cpath->custom_private = list_make2(NIL, NIL);
|
|
}
|
|
|
|
int numberOfColumnsRead = 0;
|
|
#if PG_VERSION_NUM >= PG_VERSION_16
|
|
if (rte->perminfoindex > 0)
|
|
{
|
|
/*
|
|
* If perminfoindex > 0, that means that this relation's permission info
|
|
* is directly found in the list of rteperminfos of the Query(root->parse)
|
|
* So, all we have to do here is retrieve that info.
|
|
*/
|
|
RTEPermissionInfo *perminfo = getRTEPermissionInfo(root->parse->rteperminfos,
|
|
rte);
|
|
numberOfColumnsRead = bms_num_members(perminfo->selectedCols);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* If perminfoindex = 0, that means we are skipping the check for permission info
|
|
* for this relation, which means that it's either a partition or an inheritance child.
|
|
* In these cases, we need to access the permission info of the top parent of this relation.
|
|
* After thorough checking, we found that the index of the top parent pointing to the correct
|
|
* range table entry in Query's range tables (root->parse->rtable) is found under
|
|
* RelOptInfo rel->top_parent->relid.
|
|
* For reference, check expand_partitioned_rtentry and expand_inherited_rtentry PG functions
|
|
*/
|
|
Assert(rel->top_parent);
|
|
RangeTblEntry *parent_rte = rt_fetch(rel->top_parent->relid, root->parse->rtable);
|
|
RTEPermissionInfo *perminfo = getRTEPermissionInfo(root->parse->rteperminfos,
|
|
parent_rte);
|
|
numberOfColumnsRead = bms_num_members(fixup_inherited_columns(perminfo->relid,
|
|
rte->relid,
|
|
perminfo->
|
|
selectedCols));
|
|
}
|
|
#else
|
|
numberOfColumnsRead = bms_num_members(rte->selectedCols);
|
|
#endif
|
|
|
|
int numberOfClausesPushed = list_length(allClauses);
|
|
|
|
CostColumnarScan(root, rel, rte->relid, cpath, numberOfColumnsRead,
|
|
numberOfClausesPushed);
|
|
|
|
|
|
StringInfoData buf;
|
|
initStringInfo(&buf);
|
|
ereport(ColumnarPlannerDebugLevel,
|
|
(errmsg("columnar planner: adding CustomScan path for %s",
|
|
rte->eref->aliasname),
|
|
errdetail("%s; %d clauses pushed down",
|
|
ParameterizationAsString(root, paramRelids, &buf),
|
|
numberOfClausesPushed)));
|
|
|
|
add_path(rel, path);
|
|
}
|
|
|
|
|
|
#if PG_VERSION_NUM >= PG_VERSION_16
|
|
|
|
/*
|
|
* fixup_inherited_columns
|
|
*
|
|
* Exact function Copied from PG16 as it's static.
|
|
*
|
|
* When user is querying on a table with children, it implicitly accesses
|
|
* child tables also. So, we also need to check security label of child
|
|
* tables and columns, but there is no guarantee attribute numbers are
|
|
* same between the parent and children.
|
|
* It returns a bitmapset which contains attribute number of the child
|
|
* table based on the given bitmapset of the parent.
|
|
*/
|
|
static Bitmapset *
|
|
fixup_inherited_columns(Oid parentId, Oid childId, Bitmapset *columns)
|
|
{
|
|
Bitmapset *result = NULL;
|
|
|
|
/*
|
|
* obviously, no need to do anything here
|
|
*/
|
|
if (parentId == childId)
|
|
{
|
|
return columns;
|
|
}
|
|
|
|
int index = -1;
|
|
while ((index = bms_next_member(columns, index)) >= 0)
|
|
{
|
|
/* bit numbers are offset by FirstLowInvalidHeapAttributeNumber */
|
|
AttrNumber attno = index + FirstLowInvalidHeapAttributeNumber;
|
|
|
|
/*
|
|
* whole-row-reference shall be fixed-up later
|
|
*/
|
|
if (attno == InvalidAttrNumber)
|
|
{
|
|
result = bms_add_member(result, index);
|
|
continue;
|
|
}
|
|
|
|
char *attname = get_attname(parentId, attno, false);
|
|
attno = get_attnum(childId, attname);
|
|
if (attno == InvalidAttrNumber)
|
|
{
|
|
elog(ERROR, "cache lookup failed for attribute %s of relation %u",
|
|
attname, childId);
|
|
}
|
|
|
|
result = bms_add_member(result,
|
|
attno - FirstLowInvalidHeapAttributeNumber);
|
|
|
|
pfree(attname);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
/*
|
|
* CostColumnarScan calculates the cost of scanning the columnar table. The
|
|
* cost is estimated by using all stripe metadata to estimate based on the
|
|
* columns to read how many pages need to be read.
|
|
*/
|
|
static void
|
|
CostColumnarScan(PlannerInfo *root, RelOptInfo *rel, Oid relationId,
|
|
CustomPath *cpath, int numberOfColumnsRead, int nClauses)
|
|
{
|
|
Path *path = &cpath->path;
|
|
|
|
List *allClauses = lsecond(cpath->custom_private);
|
|
Selectivity clauseSel = clauselist_selectivity(
|
|
root, allClauses, rel->relid, JOIN_INNER, NULL);
|
|
|
|
/*
|
|
* We already filtered out clauses where the overall selectivity would be
|
|
* misleading, such as inequalities involving an uncorrelated column. So
|
|
* we can apply the selectivity directly to the number of stripes.
|
|
*/
|
|
double stripesToRead = clauseSel * ColumnarTableStripeCount(relationId);
|
|
stripesToRead = Max(stripesToRead, 1.0);
|
|
|
|
path->rows = rel->rows;
|
|
path->startup_cost = 0;
|
|
path->total_cost = stripesToRead *
|
|
ColumnarPerStripeScanCost(rel, relationId, numberOfColumnsRead);
|
|
}
|
|
|
|
|
|
/*
|
|
* ColumnarPerStripeScanCost calculates the cost to scan a single stripe
|
|
* of given columnar table based on number of columns that needs to be
|
|
* read during scan operation.
|
|
*/
|
|
static Cost
|
|
ColumnarPerStripeScanCost(RelOptInfo *rel, Oid relationId, int numberOfColumnsRead)
|
|
{
|
|
Relation relation = RelationIdGetRelation(relationId);
|
|
if (!RelationIsValid(relation))
|
|
{
|
|
ereport(ERROR, (errmsg("could not open relation with OID %u", relationId)));
|
|
}
|
|
|
|
List *stripeList = StripesForRelfilelocator(RelationPhysicalIdentifier_compat(
|
|
relation));
|
|
RelationClose(relation);
|
|
|
|
uint32 maxColumnCount = 0;
|
|
uint64 totalStripeSize = 0;
|
|
StripeMetadata *stripeMetadata = NULL;
|
|
foreach_declared_ptr(stripeMetadata, stripeList)
|
|
{
|
|
totalStripeSize += stripeMetadata->dataLength;
|
|
maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount);
|
|
}
|
|
|
|
/*
|
|
* When no stripes are in the table we don't have a count in maxColumnCount. To
|
|
* prevent a division by zero turning into a NaN we keep the ratio on zero.
|
|
* This will result in a cost of 0 for scanning the table which is a reasonable
|
|
* cost on an empty table.
|
|
*/
|
|
if (maxColumnCount == 0)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
double columnSelectionRatio = numberOfColumnsRead / (double) maxColumnCount;
|
|
Cost tableScanCost = (double) totalStripeSize / BLCKSZ * columnSelectionRatio;
|
|
Cost perStripeScanCost = tableScanCost / list_length(stripeList);
|
|
|
|
/*
|
|
* Finally, multiply the cost of reading a single stripe by seq page read
|
|
* cost to make our estimation scale compatible with postgres.
|
|
* Since we are calculating the cost for a single stripe here, we use seq
|
|
* page cost instead of random page cost. This is because, random page
|
|
* access only happens when switching between columns, which is pretty
|
|
* much neglactable.
|
|
*/
|
|
double relSpaceSeqPageCost;
|
|
get_tablespace_page_costs(rel->reltablespace,
|
|
NULL, &relSpaceSeqPageCost);
|
|
perStripeScanCost = perStripeScanCost * relSpaceSeqPageCost;
|
|
|
|
return perStripeScanCost;
|
|
}
|
|
|
|
|
|
/*
|
|
* ColumnarTableStripeCount returns the number of stripes that columnar
|
|
* table with relationId has by using stripe metadata.
|
|
*/
|
|
static uint64
|
|
ColumnarTableStripeCount(Oid relationId)
|
|
{
|
|
Relation relation = RelationIdGetRelation(relationId);
|
|
if (!RelationIsValid(relation))
|
|
{
|
|
ereport(ERROR, (errmsg("could not open relation with OID %u", relationId)));
|
|
}
|
|
|
|
List *stripeList = StripesForRelfilelocator(RelationPhysicalIdentifier_compat(
|
|
relation));
|
|
int stripeCount = list_length(stripeList);
|
|
RelationClose(relation);
|
|
|
|
return stripeCount;
|
|
}
|
|
|
|
|
|
static Plan *
|
|
ColumnarScanPath_PlanCustomPath(PlannerInfo *root,
|
|
RelOptInfo *rel,
|
|
struct CustomPath *best_path,
|
|
List *tlist,
|
|
List *clauses,
|
|
List *custom_plans)
|
|
{
|
|
/*
|
|
* Must return a CustomScan, not a larger structure containing a
|
|
* CustomScan as the first field. Otherwise, copyObject() will fail to
|
|
* copy the additional fields.
|
|
*/
|
|
CustomScan *cscan = makeNode(CustomScan);
|
|
|
|
cscan->methods = &ColumnarScanScanMethods;
|
|
|
|
/* XXX: also need to store projected column list for EXPLAIN */
|
|
|
|
if (EnableColumnarQualPushdown)
|
|
{
|
|
/*
|
|
* Lists of pushed-down clauses. The Vars in custom_exprs referencing
|
|
* other relations will be changed into exec Params by
|
|
* create_customscan_plan().
|
|
*
|
|
* Like CustomPath->custom_private, keep a list of plain clauses
|
|
* separate from the list of all clauses by making them sublists of a
|
|
* 2-element list.
|
|
*
|
|
* XXX: custom_exprs are the quals that will be pushed into the
|
|
* columnar reader code; some of these may not be usable. We should
|
|
* fix this by processing the quals more completely and using
|
|
* ScanKeys.
|
|
*/
|
|
List *plainClauses = extract_actual_clauses(
|
|
linitial(best_path->custom_private), false /* no pseudoconstants */);
|
|
List *allClauses = extract_actual_clauses(
|
|
lsecond(best_path->custom_private), false /* no pseudoconstants */);
|
|
cscan->custom_exprs = copyObject(list_make2(plainClauses, allClauses));
|
|
}
|
|
else
|
|
{
|
|
cscan->custom_exprs = list_make2(NIL, NIL);
|
|
}
|
|
|
|
cscan->scan.plan.qual = extract_actual_clauses(
|
|
clauses, false /* no pseudoconstants */);
|
|
cscan->scan.plan.targetlist = list_copy(tlist);
|
|
cscan->scan.scanrelid = best_path->path.parent->relid;
|
|
|
|
#if (PG_VERSION_NUM >= 150000)
|
|
|
|
/* necessary to avoid extra Result node in PG15 */
|
|
cscan->flags = CUSTOMPATH_SUPPORT_PROJECTION;
|
|
#endif
|
|
|
|
return (Plan *) cscan;
|
|
}
|
|
|
|
|
|
/*
|
|
* ReparameterizeMutator changes all varnos referencing the topmost parent of
|
|
* child_rel to instead reference child_rel directly.
|
|
*/
|
|
static Node *
|
|
ReparameterizeMutator(Node *node, RelOptInfo *child_rel)
|
|
{
|
|
if (node == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
if (IsA(node, Var))
|
|
{
|
|
Var *var = castNode(Var, node);
|
|
if (bms_is_member(var->varno, child_rel->top_parent_relids))
|
|
{
|
|
var = copyObject(var);
|
|
var->varno = child_rel->relid;
|
|
}
|
|
return (Node *) var;
|
|
}
|
|
|
|
if (IsA(node, RestrictInfo))
|
|
{
|
|
RestrictInfo *rinfo = castNode(RestrictInfo, node);
|
|
rinfo = copyObject(rinfo);
|
|
rinfo->clause = (Expr *) expression_tree_mutator(
|
|
(Node *) rinfo->clause, ReparameterizeMutator, (void *) child_rel);
|
|
return (Node *) rinfo;
|
|
}
|
|
return expression_tree_mutator(node, ReparameterizeMutator,
|
|
(void *) child_rel);
|
|
}
|
|
|
|
|
|
/*
|
|
* ColumnarScanPath_ReparameterizeCustomPathByChild is a method called when a
|
|
* path is reparameterized directly to a child relation, rather than the
|
|
* top-level parent.
|
|
*
|
|
* For instance, let there be a join of two partitioned columnar relations PX
|
|
* and PY. A path for a ColumnarScan of PY3 might be parameterized by PX so
|
|
* that the join qual "PY3.a = PX.a" (referencing the parent PX) can be pushed
|
|
* down. But if the planner decides on a partition-wise join, then the path
|
|
* will be reparameterized on the child table PX3 directly.
|
|
*
|
|
* When that happens, we need to update all Vars in the pushed-down quals to
|
|
* reference PX3, not PX, to match the new parameterization. This method
|
|
* notifies us that it needs to be done, and allows us to update the
|
|
* information in custom_private.
|
|
*/
|
|
static List *
|
|
ColumnarScanPath_ReparameterizeCustomPathByChild(PlannerInfo *root,
|
|
List *custom_private,
|
|
RelOptInfo *child_rel)
|
|
{
|
|
return (List *) ReparameterizeMutator((Node *) custom_private, child_rel);
|
|
}
|
|
|
|
|
|
static Node *
|
|
ColumnarScan_CreateCustomScanState(CustomScan *cscan)
|
|
{
|
|
ColumnarScanState *columnarScanState = (ColumnarScanState *) newNode(
|
|
sizeof(ColumnarScanState), T_CustomScanState);
|
|
|
|
CustomScanState *cscanstate = &columnarScanState->custom_scanstate;
|
|
cscanstate->methods = &ColumnarScanExecuteMethods;
|
|
|
|
return (Node *) cscanstate;
|
|
}
|
|
|
|
|
|
/*
|
|
* EvalParamsMutator evaluates Params in the expression and replaces them with
|
|
* Consts.
|
|
*/
|
|
static Node *
|
|
EvalParamsMutator(Node *node, ExprContext *econtext)
|
|
{
|
|
if (node == NULL)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
if (IsA(node, Param))
|
|
{
|
|
Param *param = (Param *) node;
|
|
int16 typLen;
|
|
bool typByVal;
|
|
bool isnull;
|
|
|
|
get_typlenbyval(param->paramtype, &typLen, &typByVal);
|
|
|
|
/* XXX: should save ExprState for efficiency */
|
|
ExprState *exprState = ExecInitExprWithParams((Expr *) node,
|
|
econtext->ecxt_param_list_info);
|
|
Datum pval = ExecEvalExpr(exprState, econtext, &isnull);
|
|
|
|
return (Node *) makeConst(param->paramtype,
|
|
param->paramtypmod,
|
|
param->paramcollid,
|
|
(int) typLen,
|
|
pval,
|
|
isnull,
|
|
typByVal);
|
|
}
|
|
|
|
return expression_tree_mutator(node, EvalParamsMutator, (void *) econtext);
|
|
}
|
|
|
|
|
|
static void
|
|
ColumnarScan_BeginCustomScan(CustomScanState *cscanstate, EState *estate, int eflags)
|
|
{
|
|
CustomScan *cscan = (CustomScan *) cscanstate->ss.ps.plan;
|
|
ColumnarScanState *columnarScanState = (ColumnarScanState *) cscanstate;
|
|
ExprContext *stdecontext = cscanstate->ss.ps.ps_ExprContext;
|
|
|
|
/*
|
|
* Make a new ExprContext just like the existing one, except that we don't
|
|
* reset it every tuple.
|
|
*/
|
|
ExecAssignExprContext(estate, &cscanstate->ss.ps);
|
|
columnarScanState->css_RuntimeContext = cscanstate->ss.ps.ps_ExprContext;
|
|
cscanstate->ss.ps.ps_ExprContext = stdecontext;
|
|
|
|
ResetExprContext(columnarScanState->css_RuntimeContext);
|
|
List *plainClauses = linitial(cscan->custom_exprs);
|
|
columnarScanState->qual = (List *) EvalParamsMutator(
|
|
(Node *) plainClauses, columnarScanState->css_RuntimeContext);
|
|
|
|
/* scan slot is already initialized */
|
|
}
|
|
|
|
|
|
/*
|
|
* ColumnarAttrNeeded returns a list of AttrNumber's for the ones that are
|
|
* needed during columnar custom scan.
|
|
* Throws an error if finds a Var referencing to an attribute not supported
|
|
* by ColumnarScan.
|
|
*/
|
|
static Bitmapset *
|
|
ColumnarAttrNeeded(ScanState *ss)
|
|
{
|
|
TupleTableSlot *slot = ss->ss_ScanTupleSlot;
|
|
int natts = slot->tts_tupleDescriptor->natts;
|
|
Bitmapset *attr_needed = NULL;
|
|
Plan *plan = ss->ps.plan;
|
|
int flags = PVC_RECURSE_AGGREGATES |
|
|
PVC_RECURSE_WINDOWFUNCS | PVC_RECURSE_PLACEHOLDERS;
|
|
List *vars = list_concat(pull_var_clause((Node *) plan->targetlist, flags),
|
|
pull_var_clause((Node *) plan->qual, flags));
|
|
ListCell *lc;
|
|
|
|
foreach(lc, vars)
|
|
{
|
|
Var *var = lfirst(lc);
|
|
|
|
if (var->varattno < 0)
|
|
{
|
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg(
|
|
"UPDATE and CTID scans not supported for ColumnarScan")));
|
|
}
|
|
|
|
if (var->varattno == 0)
|
|
{
|
|
elog(DEBUG1, "Need attribute: all");
|
|
|
|
/* all attributes are required, we don't need to add more so break*/
|
|
attr_needed = bms_add_range(attr_needed, 0, natts - 1);
|
|
break;
|
|
}
|
|
|
|
elog(DEBUG1, "Need attribute: %d", var->varattno);
|
|
attr_needed = bms_add_member(attr_needed, var->varattno - 1);
|
|
}
|
|
|
|
return attr_needed;
|
|
}
|
|
|
|
|
|
static TupleTableSlot *
|
|
ColumnarScanNext(ColumnarScanState *columnarScanState)
|
|
{
|
|
CustomScanState *node = (CustomScanState *) columnarScanState;
|
|
|
|
/*
|
|
* get information from the estate and scan state
|
|
*/
|
|
TableScanDesc scandesc = node->ss.ss_currentScanDesc;
|
|
EState *estate = node->ss.ps.state;
|
|
ScanDirection direction = estate->es_direction;
|
|
TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
|
|
|
|
if (scandesc == NULL)
|
|
{
|
|
/* the columnar access method does not use the flags, they are specific to heap */
|
|
uint32 flags = 0;
|
|
Bitmapset *attr_needed = ColumnarAttrNeeded(&node->ss);
|
|
|
|
/*
|
|
* We reach here if the scan is not parallel, or if we're serially
|
|
* executing a scan that was planned to be parallel.
|
|
*/
|
|
scandesc = columnar_beginscan_extended(node->ss.ss_currentRelation,
|
|
estate->es_snapshot,
|
|
0, NULL, NULL, flags, attr_needed,
|
|
columnarScanState->qual);
|
|
bms_free(attr_needed);
|
|
|
|
node->ss.ss_currentScanDesc = scandesc;
|
|
}
|
|
|
|
/*
|
|
* get the next tuple from the table
|
|
*/
|
|
if (table_scan_getnextslot(scandesc, direction, slot))
|
|
{
|
|
return slot;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/*
|
|
* SeqRecheck -- access method routine to recheck a tuple in EvalPlanQual
|
|
*/
|
|
static bool
|
|
ColumnarScanRecheck(ColumnarScanState *node, TupleTableSlot *slot)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
|
|
static TupleTableSlot *
|
|
ColumnarScan_ExecCustomScan(CustomScanState *node)
|
|
{
|
|
return ExecScan(&node->ss,
|
|
(ExecScanAccessMtd) ColumnarScanNext,
|
|
(ExecScanRecheckMtd) ColumnarScanRecheck);
|
|
}
|
|
|
|
|
|
static void
|
|
ColumnarScan_EndCustomScan(CustomScanState *node)
|
|
{
|
|
/*
|
|
* get information from node
|
|
*/
|
|
TableScanDesc scanDesc = node->ss.ss_currentScanDesc;
|
|
|
|
/*
|
|
* clean out the tuple table
|
|
*/
|
|
if (node->ss.ps.ps_ResultTupleSlot)
|
|
{
|
|
ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
|
|
}
|
|
ExecClearTuple(node->ss.ss_ScanTupleSlot);
|
|
|
|
/*
|
|
* close heap scan
|
|
*/
|
|
if (scanDesc != NULL)
|
|
{
|
|
table_endscan(scanDesc);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
ColumnarScan_ReScanCustomScan(CustomScanState *node)
|
|
{
|
|
CustomScan *cscan = (CustomScan *) node->ss.ps.plan;
|
|
ColumnarScanState *columnarScanState = (ColumnarScanState *) node;
|
|
|
|
ResetExprContext(columnarScanState->css_RuntimeContext);
|
|
List *allClauses = lsecond(cscan->custom_exprs);
|
|
columnarScanState->qual = (List *) EvalParamsMutator(
|
|
(Node *) allClauses, columnarScanState->css_RuntimeContext);
|
|
|
|
TableScanDesc scanDesc = node->ss.ss_currentScanDesc;
|
|
|
|
if (scanDesc != NULL)
|
|
{
|
|
/* XXX: hack to pass quals as scan keys */
|
|
ScanKey scanKeys = (ScanKey) columnarScanState->qual;
|
|
table_rescan(node->ss.ss_currentScanDesc,
|
|
scanKeys);
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
ColumnarScan_ExplainCustomScan(CustomScanState *node, List *ancestors,
|
|
ExplainState *es)
|
|
{
|
|
ColumnarScanState *columnarScanState = (ColumnarScanState *) node;
|
|
|
|
List *context = set_deparse_context_planstate(
|
|
es->deparse_cxt, (Node *) &node->ss.ps, ancestors);
|
|
|
|
List *projectedColumns = ColumnarVarNeeded(columnarScanState);
|
|
const char *projectedColumnsStr = ColumnarProjectedColumnsStr(
|
|
context, projectedColumns);
|
|
ExplainPropertyText("Columnar Projected Columns",
|
|
projectedColumnsStr, es);
|
|
|
|
CustomScan *cscan = castNode(CustomScan, node->ss.ps.plan);
|
|
List *chunkGroupFilter = lsecond(cscan->custom_exprs);
|
|
if (chunkGroupFilter != NULL)
|
|
{
|
|
const char *pushdownClausesStr = ColumnarPushdownClausesStr(
|
|
context, chunkGroupFilter);
|
|
ExplainPropertyText("Columnar Chunk Group Filters",
|
|
pushdownClausesStr, es);
|
|
|
|
ColumnarScanDesc columnarScanDesc =
|
|
(ColumnarScanDesc) node->ss.ss_currentScanDesc;
|
|
if (columnarScanDesc != NULL)
|
|
{
|
|
ExplainPropertyInteger(
|
|
"Columnar Chunk Groups Removed by Filter",
|
|
NULL, ColumnarScanChunkGroupsFiltered(columnarScanDesc), es);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
* ColumnarPushdownClausesStr represents the clauses to push down as a string.
|
|
*/
|
|
static const char *
|
|
ColumnarPushdownClausesStr(List *context, List *clauses)
|
|
{
|
|
Expr *conjunction;
|
|
|
|
Assert(list_length(clauses) > 0);
|
|
|
|
if (list_length(clauses) == 1)
|
|
{
|
|
conjunction = (Expr *) linitial(clauses);
|
|
}
|
|
else
|
|
{
|
|
conjunction = make_andclause(clauses);
|
|
}
|
|
|
|
bool useTableNamePrefix = false;
|
|
bool showImplicitCast = false;
|
|
return deparse_expression((Node *) conjunction, context,
|
|
useTableNamePrefix, showImplicitCast);
|
|
}
|
|
|
|
|
|
/*
|
|
* ColumnarProjectedColumnsStr generates projected column string for
|
|
* explain output.
|
|
*/
|
|
static const char *
|
|
ColumnarProjectedColumnsStr(List *context, List *projectedColumns)
|
|
{
|
|
if (list_length(projectedColumns) == 0)
|
|
{
|
|
return "<columnar optimized out all columns>";
|
|
}
|
|
|
|
bool useTableNamePrefix = false;
|
|
bool showImplicitCast = false;
|
|
return deparse_expression((Node *) projectedColumns, context,
|
|
useTableNamePrefix, showImplicitCast);
|
|
}
|
|
|
|
|
|
/*
|
|
* ColumnarVarNeeded returns a list of Var objects for the ones that are
|
|
* needed during columnar custom scan.
|
|
* Throws an error if finds a Var referencing to an attribute not supported
|
|
* by ColumnarScan.
|
|
*/
|
|
static List *
|
|
ColumnarVarNeeded(ColumnarScanState *columnarScanState)
|
|
{
|
|
ScanState *scanState = &columnarScanState->custom_scanstate.ss;
|
|
|
|
List *varList = NIL;
|
|
|
|
Bitmapset *neededAttrSet = ColumnarAttrNeeded(scanState);
|
|
int bmsMember = -1;
|
|
while ((bmsMember = bms_next_member(neededAttrSet, bmsMember)) >= 0)
|
|
{
|
|
Relation columnarRelation = scanState->ss_currentRelation;
|
|
|
|
/* neededAttrSet already represents 0-indexed attribute numbers */
|
|
Form_pg_attribute columnForm =
|
|
TupleDescAttr(RelationGetDescr(columnarRelation), bmsMember);
|
|
if (columnForm->attisdropped)
|
|
{
|
|
ereport(ERROR, (errcode(ERRCODE_UNDEFINED_COLUMN),
|
|
errmsg("cannot explain column with attrNum=%d "
|
|
"of columnar table %s since it is dropped",
|
|
bmsMember + 1,
|
|
RelationGetRelationName(columnarRelation))));
|
|
}
|
|
else if (columnForm->attnum <= 0)
|
|
{
|
|
/*
|
|
* ColumnarAttrNeeded should have already thrown an error for
|
|
* system columns. Similarly, it should have already expanded
|
|
* whole-row references to individual attributes.
|
|
*/
|
|
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
errmsg("cannot explain column with attrNum=%d "
|
|
"of columnar table %s since it is either "
|
|
"a system column or a whole-row "
|
|
"reference", columnForm->attnum,
|
|
RelationGetRelationName(columnarRelation))));
|
|
}
|
|
|
|
|
|
/*
|
|
* varlevelsup is used to figure out the (query) level of the Var
|
|
* that we are investigating. Since we are dealing with a particular
|
|
* relation, it is useless here.
|
|
*/
|
|
Index varlevelsup = 0;
|
|
|
|
CustomScanState *customScanState = (CustomScanState *) columnarScanState;
|
|
CustomScan *customScan = (CustomScan *) customScanState->ss.ps.plan;
|
|
Index scanrelid = customScan->scan.scanrelid;
|
|
Var *var = makeVar(scanrelid, columnForm->attnum, columnForm->atttypid,
|
|
columnForm->atttypmod, columnForm->attcollation,
|
|
varlevelsup);
|
|
varList = lappend(varList, var);
|
|
}
|
|
|
|
return varList;
|
|
}
|
|
|
|
|
|
/*
|
|
* set_deparse_context_planstate is a compatibility wrapper for versions 13+.
|
|
*/
|
|
static List *
|
|
set_deparse_context_planstate(List *dpcontext, Node *node, List *ancestors)
|
|
{
|
|
PlanState *ps = (PlanState *) node;
|
|
return set_deparse_context_plan(dpcontext, ps->plan, ancestors);
|
|
}
|