Refactor ColumnarScanCost into ColumnarPerChunkGroupScanCost

pull/5090/head
Onur Tirtir 2021-06-25 16:22:14 +03:00
parent d8f92697f2
commit dba8421453
1 changed files with 55 additions and 27 deletions

View File

@ -60,7 +60,10 @@ static void RemovePathsByPredicate(RelOptInfo *rel, PathPredicate removePathPred
static bool IsNotIndexPath(Path *path); static bool IsNotIndexPath(Path *path);
static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, static Path * CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte); RangeTblEntry *rte);
static Cost ColumnarScanCost(RangeTblEntry *rte); static Cost ColumnarScanCost(Oid relationId, int numberOfColumnsRead);
static Cost ColumnarPerStripeScanCost(Oid relationId,
int numberOfColumnsRead);
static uint64 ColumnarTableStripeCount(Oid relationId);
static Plan * ColumnarScanPath_PlanCustomPath(PlannerInfo *root, static Plan * ColumnarScanPath_PlanCustomPath(PlannerInfo *root,
RelOptInfo *rel, RelOptInfo *rel,
struct CustomPath *best_path, struct CustomPath *best_path,
@ -265,7 +268,9 @@ CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
*/ */
path->rows = rel->rows; path->rows = rel->rows;
path->startup_cost = 0; path->startup_cost = 0;
path->total_cost = path->startup_cost + ColumnarScanCost(rte); int numberOfColumnsRead = bms_num_members(rte->selectedCols);
path->total_cost = path->startup_cost +
ColumnarScanCost(rte->relid, numberOfColumnsRead);
return (Path *) cspath; return (Path *) cspath;
} }
@ -277,42 +282,65 @@ CreateColumnarScanPath(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
* need to be read. * need to be read.
*/ */
static Cost static Cost
ColumnarScanCost(RangeTblEntry *rte) ColumnarScanCost(Oid relationId, int numberOfColumnsRead)
{ {
Relation rel = RelationIdGetRelation(rte->relid); return ColumnarTableStripeCount(relationId) *
List *stripeList = StripesForRelfilenode(rel->rd_node); ColumnarPerStripeScanCost(relationId, numberOfColumnsRead);
RelationClose(rel); }
/*
* ColumnarPerStripeScanCost calculates the cost to scan a single stripe
* of given columnar table based on number of columns that needs to be
* read during scan operation.
*/
static Cost
ColumnarPerStripeScanCost(Oid relationId, int numberOfColumnsRead)
{
Relation relation = RelationIdGetRelation(relationId);
List *stripeList = StripesForRelfilenode(relation->rd_node);
RelationClose(relation);
uint32 maxColumnCount = 0; uint32 maxColumnCount = 0;
uint64 totalStripeSize = 0; uint64 totalStripeSize = 0;
ListCell *stripeMetadataCell = NULL; StripeMetadata *stripeMetadata = NULL;
rel = NULL; foreach_ptr(stripeMetadata, stripeList)
foreach(stripeMetadataCell, stripeList)
{ {
StripeMetadata *stripeMetadata = (StripeMetadata *) lfirst(stripeMetadataCell);
totalStripeSize += stripeMetadata->dataLength; totalStripeSize += stripeMetadata->dataLength;
maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount); maxColumnCount = Max(maxColumnCount, stripeMetadata->columnCount);
} }
{
Bitmapset *attr_needed = rte->selectedCols;
double numberOfColumnsRead = bms_num_members(attr_needed);
double selectionRatio = 0;
/* /*
* When no stripes are in the table we don't have a count in maxColumnCount. To * When no stripes are in the table we don't have a count in maxColumnCount. To
* prevent a division by zero turning into a NaN we keep the ratio on zero. * prevent a division by zero turning into a NaN we keep the ratio on zero.
* This will result in a cost of 0 for scanning the table which is a reasonable * This will result in a cost of 0 for scanning the table which is a reasonable
* cost on an empty table. * cost on an empty table.
*/ */
if (maxColumnCount != 0) if (maxColumnCount == 0)
{ {
selectionRatio = numberOfColumnsRead / (double) maxColumnCount; return 0;
}
Cost scanCost = (double) totalStripeSize / BLCKSZ * selectionRatio;
return scanCost;
} }
double columnSelectionRatio = numberOfColumnsRead / (double) maxColumnCount;
Cost tableScanCost = (double) totalStripeSize / BLCKSZ * columnSelectionRatio;
Cost perStripeScanCost = tableScanCost / list_length(stripeList);
return perStripeScanCost;
}
/*
* ColumnarTableStripeCount returns the number of stripes that columnar
* table with relationId has by using stripe metadata.
*/
static uint64
ColumnarTableStripeCount(Oid relationId)
{
Relation relation = RelationIdGetRelation(relationId);
List *stripeList = StripesForRelfilenode(relation->rd_node);
int stripeCount = list_length(stripeList);
RelationClose(relation);
return stripeCount;
} }