Merge pull request #2640 from citusdata/fix_bad_pruning

Address unsafe coercion removal in pruning logic

cr: @onderkalaci
pull/2631/head
Jason Petersen 2019-03-25 23:05:41 -05:00 committed by GitHub
commit f218549572
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 164 additions and 11 deletions

View File

@ -63,6 +63,8 @@
#include "nodes/nodeFuncs.h"
#include "nodes/makefuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/planner.h"
#include "parser/parse_coerce.h"
#include "utils/arrayaccess.h"
#include "utils/catcache.h"
#include "utils/lsyscache.h"
@ -171,6 +173,8 @@ static bool PrunableExpressionsWalker(Node *originalNode, ClauseWalkerContext *c
static void AddPartitionKeyRestrictionToInstance(ClauseWalkerContext *context,
OpExpr *opClause, Var *varClause,
Const *constantClause);
static Const * TransformPartitionRestrictionValue(Var *partitionColumn,
Const *restrictionValue);
static void AddSAOPartitionKeyRestrictionToInstance(ClauseWalkerContext *context,
ScalarArrayOpExpr *
arrayOperatorExpression);
@ -530,8 +534,7 @@ PrunableExpressionsWalker(Node *node, ClauseWalkerContext *context)
if (!prune->addedToPruningInstances)
{
context->pruningInstances = lappend(context->pruningInstances,
prune);
context->pruningInstances = lappend(context->pruningInstances, prune);
prune->addedToPruningInstances = true;
}
@ -561,8 +564,8 @@ PrunableExpressionsWalker(Node *node, ClauseWalkerContext *context)
* Found a restriction on the partition column itself. Update the
* current constraint with the new information.
*/
AddPartitionKeyRestrictionToInstance(context,
opClause, varClause, constantClause);
AddPartitionKeyRestrictionToInstance(context, opClause, varClause,
constantClause);
}
else if (constantClause && varClause &&
varClause->varattno == RESERVED_HASHED_COLUMN_ID)
@ -593,8 +596,7 @@ PrunableExpressionsWalker(Node *node, ClauseWalkerContext *context)
*/
if (!prune->addedToPruningInstances)
{
context->pruningInstances = lappend(context->pruningInstances,
prune);
context->pruningInstances = lappend(context->pruningInstances, prune);
prune->addedToPruningInstances = true;
}
@ -715,15 +717,32 @@ AddNewConjuction(ClauseWalkerContext *context, OpExpr *op)
*/
static void
AddPartitionKeyRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opClause,
Var *varClause, Const *constantClause)
Var *partitionColumn, Const *constantClause)
{
PruningInstance *prune = context->currentPruningInstance;
List *btreeInterpretationList = NULL;
ListCell *btreeInterpretationCell = NULL;
bool matchedOp = false;
btreeInterpretationList =
get_op_btree_interpretation(opClause->opno);
/* only have extra work to do if const isn't same type as partition column */
if (constantClause->consttype != partitionColumn->vartype)
{
/* we want our restriction value in terms of the type of the partition column */
constantClause = TransformPartitionRestrictionValue(partitionColumn,
constantClause);
if (constantClause == NULL)
{
/* couldn't coerce value, so we note this as a restriction we don't grok */
prune->otherRestrictions = lappend(prune->otherRestrictions, opClause);
return;
}
}
/* at this point, we'd better be able to pass binary Datums to comparison functions */
Assert(IsBinaryCoercible(constantClause->consttype, partitionColumn->vartype));
btreeInterpretationList = get_op_btree_interpretation(opClause->opno);
foreach(btreeInterpretationCell, btreeInterpretationList)
{
OpBtreeInterpretation *btreeInterpretation =
@ -803,6 +822,19 @@ AddPartitionKeyRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opCla
case ROWCOMPARE_NE:
{
/*
* This case should only arise when ALL list elements have this
* "strategy" number set. Skipping to the end of the list might
* protect us if that assumption is violated, and an Assert can
* notify us if it ever is...
*/
/* should see this value immediately */
Assert(btreeInterpretationCell == btreeInterpretationList->head);
/* stop processing early, would only see unsupported nodes anyhow */
btreeInterpretationCell = btreeInterpretationList->tail;
/* TODO: could add support for this, if we feel like it */
matchedOp = false;
break;
@ -815,8 +847,7 @@ AddPartitionKeyRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opCla
if (!matchedOp)
{
prune->otherRestrictions =
lappend(prune->otherRestrictions, opClause);
prune->otherRestrictions = lappend(prune->otherRestrictions, opClause);
}
else
{
@ -825,6 +856,45 @@ AddPartitionKeyRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opCla
}
/*
* Sometimes PostgreSQL chooses to try to wrap our Var in a coercion rather
* than the Const; to deal with this, we strip the coercions from both and
* manually coerce the Const into the type of our partition column. It is
* conceivable that in some instances, this may not be possible; in those cases
* we will simply fail to prune partitions based on this clause.
*/
static Const *
TransformPartitionRestrictionValue(Var *partitionColumn, Const *restrictionValue)
{
Node *transformedValue = coerce_to_target_type(NULL, (Node *) restrictionValue,
restrictionValue->consttype,
partitionColumn->vartype,
partitionColumn->vartypmod,
COERCION_ASSIGNMENT,
COERCE_IMPLICIT_CAST, -1);
/* if NULL, no implicit coercion is possible between the types */
if (transformedValue == NULL)
{
return NULL;
}
/* if still not a constant, evaluate coercion */
if (!IsA(transformedValue, Const))
{
transformedValue = (Node *) expression_planner((Expr *) transformedValue);
}
/* if still not a constant, no immutable coercion matched */
if (!IsA(transformedValue, Const))
{
return NULL;
}
return (Const *) transformedValue;
}
/*
* AddHashRestrictionToInstance adds information about a
* RESERVED_HASHED_COLUMN_ID = Const restriction to the current pruning
@ -838,6 +908,9 @@ AddHashRestrictionToInstance(ClauseWalkerContext *context, OpExpr *opClause,
List *btreeInterpretationList = NULL;
ListCell *btreeInterpretationCell = NULL;
/* be paranoid */
Assert(IsBinaryCoercible(constantClause->consttype, INT4OID));
btreeInterpretationList =
get_op_btree_interpretation(opClause->opno);
foreach(btreeInterpretationCell, btreeInterpretationList)

View File

@ -203,3 +203,51 @@ SELECT print_sorted_shard_intervals('pruning_range');
{800004,800005,800006,800007}
(1 row)
-- ===================================================================
-- test pruning using values whose types are coerced
-- ===================================================================
CREATE TABLE coerce_hash (
id bigint NOT NULL,
value text NOT NULL
);
SELECT create_distributed_table('coerce_hash', 'id');
create_distributed_table
--------------------------
(1 row)
INSERT INTO coerce_hash VALUES (1, 'test value');
-- All three of the following should return the same results...
-- SELECT with same type as partition column
SELECT * FROM coerce_hash WHERE id = 1::bigint;
id | value
----+------------
1 | test value
(1 row)
-- SELECT with similar type to partition column
SELECT * FROM coerce_hash WHERE id = 1;
id | value
----+------------
1 | test value
(1 row)
-- SELECT with numeric type (original impetus for this change)
-- PostgreSQL will cast id to numeric rather than 1.0 to bigint...
-- We used to blindly pass the RHS' Datum to our comparison func.,
-- resulting in inaccurate pruning. An Assert now verifies type-
-- compatibility; the following would crash the server in an Assert-
-- before the underlying issue was addressed. It looks like a boring
-- test now, but if the old behavior is restored, it should crash again.
SELECT * FROM coerce_hash WHERE id = 1.0;
id | value
----+------------
1 | test value
(1 row)
SELECT * FROM coerce_hash WHERE id = 1.0::numeric;
id | value
----+------------
1 | test value
(1 row)

View File

@ -119,3 +119,35 @@ SELECT print_sorted_shard_intervals('pruning_range');
-- all shard placements are uninitialized
UPDATE pg_dist_shard set shardminvalue = NULL, shardmaxvalue = NULL WHERE shardid = 103077;
SELECT print_sorted_shard_intervals('pruning_range');
-- ===================================================================
-- test pruning using values whose types are coerced
-- ===================================================================
CREATE TABLE coerce_hash (
id bigint NOT NULL,
value text NOT NULL
);
SELECT create_distributed_table('coerce_hash', 'id');
INSERT INTO coerce_hash VALUES (1, 'test value');
-- All three of the following should return the same results...
-- SELECT with same type as partition column
SELECT * FROM coerce_hash WHERE id = 1::bigint;
-- SELECT with similar type to partition column
SELECT * FROM coerce_hash WHERE id = 1;
-- SELECT with numeric type (original impetus for this change)
-- PostgreSQL will cast id to numeric rather than 1.0 to bigint...
-- We used to blindly pass the RHS' Datum to our comparison func.,
-- resulting in inaccurate pruning. An Assert now verifies type-
-- compatibility; the following would crash the server in an Assert-
-- before the underlying issue was addressed. It looks like a boring
-- test now, but if the old behavior is restored, it should crash again.
SELECT * FROM coerce_hash WHERE id = 1.0;
SELECT * FROM coerce_hash WHERE id = 1.0::numeric;