Fix write queries with const expressions and COLLATE in various places (#3973)

pull/4003/head
Jelte Fennema 2020-07-08 18:19:53 +02:00 committed by GitHub
parent ab01571c9e
commit 16242d5264
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 353 additions and 20 deletions

View File

@ -2676,7 +2676,6 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
Oid distributedTableId = ExtractFirstCitusTableId(query);
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(distributedTableId);
char partitionMethod = cacheEntry->partitionMethod;
uint32 rangeTableId = 1;
List *modifyRouteList = NIL;
ListCell *insertValuesCell = NULL;
@ -2714,7 +2713,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
return modifyRouteList;
}
Var *partitionColumn = PartitionColumn(distributedTableId, rangeTableId);
Var *partitionColumn = cacheEntry->partitionColumn;
/* get full list of insert values and iterate over them to prune */
List *insertValuesList = ExtractInsertValuesList(query, partitionColumn);
@ -2723,8 +2722,38 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
{
InsertValues *insertValues = (InsertValues *) lfirst(insertValuesCell);
List *prunedShardIntervalList = NIL;
Expr *partitionValueExpr = (Expr *) strip_implicit_coercions(
(Node *) insertValues->partitionValueExpr);
Node *partitionValueExpr = (Node *) insertValues->partitionValueExpr;
/*
* We only support constant partition values at this point. Sometimes
* they are wrappend in an implicit coercion though. Most notably
* FuncExpr coercions for casts created with CREATE CAST ... WITH
* FUNCTION .. AS IMPLICIT. To support this first we strip them here.
* Then we do the coercion manually below using
* TransformPartitionRestrictionValue, if the types are not the same.
*
* NOTE: eval_const_expressions below would do some of these removals
* too, but it's unclear if it would do all of them. It is possible
* that there are no cases where this strip_implicit_coercions call is
* really necessary at all, but currently that's hard to rule out.
* So to be on the safe side we call strip_implicit_coercions too, to
* be sure we support as much as possible.
*/
partitionValueExpr = strip_implicit_coercions(partitionValueExpr);
/*
* By evaluating constant expressions an expression such as 2 + 4
* will become const 6. That way we can use them as a partition column
* value. Normally the planner evaluates constant expressions, but we
* may be working on the original query tree here. So we do it here
* explicitely before checking that the partition value is a const.
*
* NOTE: We do not use expression_planner here, since all it does
* apart from calling eval_const_expressions is call fix_opfuncids.
* This is not needed here, since it's a no-op for T_Const nodes and we
* error out below in all other cases.
*/
partitionValueExpr = eval_const_expressions(NULL, partitionValueExpr);
if (!IsA(partitionValueExpr, Const))
{
@ -2741,21 +2770,20 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
"column")));
}
/* actually do the coercions that we skipped before, if fails throw an
* error */
if (partitionValueConst->consttype != partitionColumn->vartype)
{
bool missingOk = false;
partitionValueConst =
TransformPartitionRestrictionValue(partitionColumn,
partitionValueConst,
missingOk);
}
if (partitionMethod == DISTRIBUTE_BY_HASH || partitionMethod ==
DISTRIBUTE_BY_RANGE)
{
Var *distributionKey = cacheEntry->partitionColumn;
/* handle coercions, if fails throw an error */
if (partitionValueConst->consttype != distributionKey->vartype)
{
bool missingOk = false;
partitionValueConst =
TransformPartitionRestrictionValue(distributionKey,
partitionValueConst,
missingOk);
}
Datum partitionValue = partitionValueConst->constvalue;
ShardInterval *shardInterval = FindShardInterval(partitionValue, cacheEntry);

View File

@ -118,6 +118,40 @@ PartiallyEvaluateExpression(Node *expression,
else if (ShouldEvaluateExpression((Expr *) expression) &&
ShouldEvaluateFunctions(coordinatorEvaluationContext))
{
/*
* The planner normally evaluates constant expressions, but we may be
* working on the original query tree. We could rely on
* citus_evaluate_expr to evaluate constant expressions, but there are
* certain node types that citus_evaluate_expr does not expect because
* the planner normally replaces them (in particular, CollateExpr).
* Hence, we first evaluate constant expressions using
* eval_const_expressions before continuing.
*
* NOTE: We do not use expression_planner here, since all it does
* apart from calling eval_const_expressions is call fix_opfuncids.
* We do not need this, since that is already called in
* citus_evaluate_expr. So we won't needlessly traverse the expression
* tree by calling it another time.
*/
expression = eval_const_expressions(NULL, expression);
/*
* It's possible that after evaluating const expressions we
* actually don't need to evaluate this expression anymore e.g:
*
* 1 = 0 AND now() > timestamp '10-10-2000 00:00'
*
* This statement would simply resolve to false, because 1 = 0 is
* false. That's why we now check again if we should evaluate the
* expression and only continue if we still do.
*/
if (!ShouldEvaluateExpression((Expr *) expression))
{
return (Node *) expression_tree_mutator(expression,
PartiallyEvaluateExpression,
coordinatorEvaluationContext);
}
if (FindNodeCheck(expression, IsVariableExpression))
{
/*

View File

@ -142,6 +142,85 @@ SELECT * FROM composite_type_partitioned_table WHERE col = '(7, 8)'::test_compo
6 | (7,8)
(1 row)
CREATE TYPE other_composite_type AS (
i integer,
i2 integer
);
-- Check that casts are correctly done on partition columns
SELECT run_command_on_coordinator_and_workers($cf$
CREATE CAST (other_composite_type AS test_composite_type) WITH INOUT AS IMPLICIT;
$cf$);
run_command_on_coordinator_and_workers
---------------------------------------------------------------------
(1 row)
INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type);
SELECT * FROM composite_type_partitioned_table WHERE id = 123;
id | col
---------------------------------------------------------------------
123 | (123,456)
(1 row)
EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE)
INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type);
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
Task Count: 1
Tasks Shown: All
-> Task
Query: INSERT INTO public.composite_type_partitioned_table_530003 (id, col) VALUES (123, '(123,456)'::public.test_composite_type)
Node: host=localhost port=xxxxx dbname=regression
-> Insert on public.composite_type_partitioned_table_530003 (actual rows=0 loops=1)
-> Result (actual rows=1 loops=1)
Output: 123, '(123,456)'::test_composite_type
(9 rows)
SELECT run_command_on_coordinator_and_workers($cf$
DROP CAST (other_composite_type as test_composite_type);
$cf$);
run_command_on_coordinator_and_workers
---------------------------------------------------------------------
(1 row)
SELECT run_command_on_coordinator_and_workers($cf$
CREATE FUNCTION to_test_composite_type(arg other_composite_type) RETURNS test_composite_type
AS 'select arg::text::test_composite_type;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
$cf$);
run_command_on_coordinator_and_workers
---------------------------------------------------------------------
(1 row)
SELECT run_command_on_coordinator_and_workers($cf$
CREATE CAST (other_composite_type AS test_composite_type) WITH FUNCTION to_test_composite_type(other_composite_type) AS IMPLICIT;
$cf$);
run_command_on_coordinator_and_workers
---------------------------------------------------------------------
(1 row)
INSERT INTO composite_type_partitioned_table VALUES (456, '(456, 678)'::other_composite_type);
EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE)
INSERT INTO composite_type_partitioned_table VALUES (123, '(456, 678)'::other_composite_type);
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
Task Count: 1
Tasks Shown: All
-> Task
Query: INSERT INTO public.composite_type_partitioned_table_530000 (id, col) VALUES (123, '(456,678)'::public.other_composite_type)
Node: host=localhost port=xxxxx dbname=regression
-> Insert on public.composite_type_partitioned_table_530000 (actual rows=0 loops=1)
-> Result (actual rows=1 loops=1)
Output: 123, '(456,678)'::test_composite_type
(9 rows)
-- create and distribute a table on enum type column
CREATE TYPE bug_status AS ENUM ('new', 'open', 'closed');
CREATE TABLE bugs (

View File

@ -0,0 +1,112 @@
CREATE SCHEMA sqlancer_failures;
SET search_path TO sqlancer_failures;
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
SET citus.next_shard_id TO 92862400;
CREATE TABLE t0 (c0 int, c1 MONEY);
SELECT create_distributed_table('t0', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
UPDATE t0 SET c1 = ((0.43107963)::MONEY) WHERE ((upper('-14295774') COLLATE "de_CH
.utf8") SIMILAR TO '');
ERROR: collation "de_CH
.utf8" for encoding "UTF8" does not exist
UPDATE t0 SET c1 = 1 WHERE '' COLLATE "C" = '';
CREATE TABLE t1 (c0 text);
SELECT create_distributed_table('t1', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO t1 VALUES ('' COLLATE "C");
CREATE TABLE t2 (c0 text, c1 bool, c2 timestamptz default now());
SELECT create_distributed_table('t2', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO t2 VALUES ('key', '' COLLATE "C" = '');
CREATE TABLE t3 (c0 text, c1 text, c2 timestamptz default now());
SELECT create_distributed_table('t3', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO t3 VALUES ('key', '' COLLATE "C");
CREATE TABLE t4(c0 real, c1 boolean);
SELECT create_distributed_table('t4', 'c1');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO t4 VALUES (1.0, 2 BETWEEN 1 AND 3);
-- NOTE: For some reason shard pruning doesn't happen correctly here. It does
-- work for non boolean const expressions. See explain plans for t5 below that
-- show that. The query still works though. So doesn't seem important enough to
-- fix, since boolean partition columns should not happen much/at all for
-- actual users.
EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = 2 BETWEEN 1 AND 3;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 4
Tasks Shown: One of 4
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on t4_92862416 t4
Filter: c1
(7 rows)
EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = true;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on t4_92862416 t4
Filter: c1
(7 rows)
CREATE TABLE t5(c0 int);
SELECT create_distributed_table('t5', 'c0');
create_distributed_table
---------------------------------------------------------------------
(1 row)
INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END);
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on t5_92862423 t5
Filter: (c0 = 2)
(7 rows)
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END;
QUERY PLAN
---------------------------------------------------------------------
Custom Scan (Citus Adaptive)
Task Count: 1
Tasks Shown: All
-> Task
Node: host=localhost port=xxxxx dbname=regression
-> Seq Scan on t5_92862423 t5
Filter: (c0 = 2)
(7 rows)
SET client_min_messages TO WARNING;
DROP SCHEMA sqlancer_failures CASCADE;

View File

@ -352,6 +352,11 @@ test: multi_deparse_function multi_deparse_procedure
# --------
test: shared_connection_stats
# ---------
# run queries generated by sql smith and sqlancer that caused issues in the past
# --------
test: sqlsmith_failures sqlancer_failures
# ---------
# test that no tests leaked intermediate results. This should always be last
# ---------
@ -363,7 +368,3 @@ test: ensure_no_intermediate_data_leak
# --------
test: ensure_no_shared_connection_leak
# ---------
# run queries generated by sql smith that caused issues in the past
# --------
test: sqlsmith_failures

View File

@ -103,6 +103,42 @@ UPDATE composite_type_partitioned_table SET id = 6 WHERE col = '(7, 8)'::test_c
SELECT * FROM composite_type_partitioned_table WHERE col = '(7, 8)'::test_composite_type;
CREATE TYPE other_composite_type AS (
i integer,
i2 integer
);
-- Check that casts are correctly done on partition columns
SELECT run_command_on_coordinator_and_workers($cf$
CREATE CAST (other_composite_type AS test_composite_type) WITH INOUT AS IMPLICIT;
$cf$);
INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type);
SELECT * FROM composite_type_partitioned_table WHERE id = 123;
EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE)
INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type);
SELECT run_command_on_coordinator_and_workers($cf$
DROP CAST (other_composite_type as test_composite_type);
$cf$);
SELECT run_command_on_coordinator_and_workers($cf$
CREATE FUNCTION to_test_composite_type(arg other_composite_type) RETURNS test_composite_type
AS 'select arg::text::test_composite_type;'
LANGUAGE SQL
IMMUTABLE
RETURNS NULL ON NULL INPUT;
$cf$);
SELECT run_command_on_coordinator_and_workers($cf$
CREATE CAST (other_composite_type AS test_composite_type) WITH FUNCTION to_test_composite_type(other_composite_type) AS IMPLICIT;
$cf$);
INSERT INTO composite_type_partitioned_table VALUES (456, '(456, 678)'::other_composite_type);
EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE)
INSERT INTO composite_type_partitioned_table VALUES (123, '(456, 678)'::other_composite_type);
-- create and distribute a table on enum type column
CREATE TYPE bug_status AS ENUM ('new', 'open', 'closed');

View File

@ -0,0 +1,43 @@
CREATE SCHEMA sqlancer_failures;
SET search_path TO sqlancer_failures;
SET citus.shard_count TO 4;
SET citus.shard_replication_factor TO 1;
SET citus.next_shard_id TO 92862400;
CREATE TABLE t0 (c0 int, c1 MONEY);
SELECT create_distributed_table('t0', 'c0');
UPDATE t0 SET c1 = ((0.43107963)::MONEY) WHERE ((upper('-14295774') COLLATE "de_CH
.utf8") SIMILAR TO '');
UPDATE t0 SET c1 = 1 WHERE '' COLLATE "C" = '';
CREATE TABLE t1 (c0 text);
SELECT create_distributed_table('t1', 'c0');
INSERT INTO t1 VALUES ('' COLLATE "C");
CREATE TABLE t2 (c0 text, c1 bool, c2 timestamptz default now());
SELECT create_distributed_table('t2', 'c0');
INSERT INTO t2 VALUES ('key', '' COLLATE "C" = '');
CREATE TABLE t3 (c0 text, c1 text, c2 timestamptz default now());
SELECT create_distributed_table('t3', 'c0');
INSERT INTO t3 VALUES ('key', '' COLLATE "C");
CREATE TABLE t4(c0 real, c1 boolean);
SELECT create_distributed_table('t4', 'c1');
INSERT INTO t4 VALUES (1.0, 2 BETWEEN 1 AND 3);
-- NOTE: For some reason shard pruning doesn't happen correctly here. It does
-- work for non boolean const expressions. See explain plans for t5 below that
-- show that. The query still works though. So doesn't seem important enough to
-- fix, since boolean partition columns should not happen much/at all for
-- actual users.
EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = 2 BETWEEN 1 AND 3;
EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = true;
CREATE TABLE t5(c0 int);
SELECT create_distributed_table('t5', 'c0');
INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END);
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2;
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END;
SET client_min_messages TO WARNING;
DROP SCHEMA sqlancer_failures CASCADE;