mirror of https://github.com/citusdata/citus.git
Fix write queries with const expressions and COLLATE in various places (#3973)
parent
ab01571c9e
commit
16242d5264
|
@ -2676,7 +2676,6 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
|
|||
Oid distributedTableId = ExtractFirstCitusTableId(query);
|
||||
CitusTableCacheEntry *cacheEntry = GetCitusTableCacheEntry(distributedTableId);
|
||||
char partitionMethod = cacheEntry->partitionMethod;
|
||||
uint32 rangeTableId = 1;
|
||||
List *modifyRouteList = NIL;
|
||||
ListCell *insertValuesCell = NULL;
|
||||
|
||||
|
@ -2714,7 +2713,7 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
|
|||
return modifyRouteList;
|
||||
}
|
||||
|
||||
Var *partitionColumn = PartitionColumn(distributedTableId, rangeTableId);
|
||||
Var *partitionColumn = cacheEntry->partitionColumn;
|
||||
|
||||
/* get full list of insert values and iterate over them to prune */
|
||||
List *insertValuesList = ExtractInsertValuesList(query, partitionColumn);
|
||||
|
@ -2723,8 +2722,38 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
|
|||
{
|
||||
InsertValues *insertValues = (InsertValues *) lfirst(insertValuesCell);
|
||||
List *prunedShardIntervalList = NIL;
|
||||
Expr *partitionValueExpr = (Expr *) strip_implicit_coercions(
|
||||
(Node *) insertValues->partitionValueExpr);
|
||||
Node *partitionValueExpr = (Node *) insertValues->partitionValueExpr;
|
||||
|
||||
/*
|
||||
* We only support constant partition values at this point. Sometimes
|
||||
* they are wrappend in an implicit coercion though. Most notably
|
||||
* FuncExpr coercions for casts created with CREATE CAST ... WITH
|
||||
* FUNCTION .. AS IMPLICIT. To support this first we strip them here.
|
||||
* Then we do the coercion manually below using
|
||||
* TransformPartitionRestrictionValue, if the types are not the same.
|
||||
*
|
||||
* NOTE: eval_const_expressions below would do some of these removals
|
||||
* too, but it's unclear if it would do all of them. It is possible
|
||||
* that there are no cases where this strip_implicit_coercions call is
|
||||
* really necessary at all, but currently that's hard to rule out.
|
||||
* So to be on the safe side we call strip_implicit_coercions too, to
|
||||
* be sure we support as much as possible.
|
||||
*/
|
||||
partitionValueExpr = strip_implicit_coercions(partitionValueExpr);
|
||||
|
||||
/*
|
||||
* By evaluating constant expressions an expression such as 2 + 4
|
||||
* will become const 6. That way we can use them as a partition column
|
||||
* value. Normally the planner evaluates constant expressions, but we
|
||||
* may be working on the original query tree here. So we do it here
|
||||
* explicitely before checking that the partition value is a const.
|
||||
*
|
||||
* NOTE: We do not use expression_planner here, since all it does
|
||||
* apart from calling eval_const_expressions is call fix_opfuncids.
|
||||
* This is not needed here, since it's a no-op for T_Const nodes and we
|
||||
* error out below in all other cases.
|
||||
*/
|
||||
partitionValueExpr = eval_const_expressions(NULL, partitionValueExpr);
|
||||
|
||||
if (!IsA(partitionValueExpr, Const))
|
||||
{
|
||||
|
@ -2741,21 +2770,20 @@ BuildRoutesForInsert(Query *query, DeferredErrorMessage **planningError)
|
|||
"column")));
|
||||
}
|
||||
|
||||
/* actually do the coercions that we skipped before, if fails throw an
|
||||
* error */
|
||||
if (partitionValueConst->consttype != partitionColumn->vartype)
|
||||
{
|
||||
bool missingOk = false;
|
||||
partitionValueConst =
|
||||
TransformPartitionRestrictionValue(partitionColumn,
|
||||
partitionValueConst,
|
||||
missingOk);
|
||||
}
|
||||
|
||||
if (partitionMethod == DISTRIBUTE_BY_HASH || partitionMethod ==
|
||||
DISTRIBUTE_BY_RANGE)
|
||||
{
|
||||
Var *distributionKey = cacheEntry->partitionColumn;
|
||||
|
||||
/* handle coercions, if fails throw an error */
|
||||
if (partitionValueConst->consttype != distributionKey->vartype)
|
||||
{
|
||||
bool missingOk = false;
|
||||
partitionValueConst =
|
||||
TransformPartitionRestrictionValue(distributionKey,
|
||||
partitionValueConst,
|
||||
missingOk);
|
||||
}
|
||||
|
||||
Datum partitionValue = partitionValueConst->constvalue;
|
||||
|
||||
ShardInterval *shardInterval = FindShardInterval(partitionValue, cacheEntry);
|
||||
|
|
|
@ -118,6 +118,40 @@ PartiallyEvaluateExpression(Node *expression,
|
|||
else if (ShouldEvaluateExpression((Expr *) expression) &&
|
||||
ShouldEvaluateFunctions(coordinatorEvaluationContext))
|
||||
{
|
||||
/*
|
||||
* The planner normally evaluates constant expressions, but we may be
|
||||
* working on the original query tree. We could rely on
|
||||
* citus_evaluate_expr to evaluate constant expressions, but there are
|
||||
* certain node types that citus_evaluate_expr does not expect because
|
||||
* the planner normally replaces them (in particular, CollateExpr).
|
||||
* Hence, we first evaluate constant expressions using
|
||||
* eval_const_expressions before continuing.
|
||||
*
|
||||
* NOTE: We do not use expression_planner here, since all it does
|
||||
* apart from calling eval_const_expressions is call fix_opfuncids.
|
||||
* We do not need this, since that is already called in
|
||||
* citus_evaluate_expr. So we won't needlessly traverse the expression
|
||||
* tree by calling it another time.
|
||||
*/
|
||||
expression = eval_const_expressions(NULL, expression);
|
||||
|
||||
/*
|
||||
* It's possible that after evaluating const expressions we
|
||||
* actually don't need to evaluate this expression anymore e.g:
|
||||
*
|
||||
* 1 = 0 AND now() > timestamp '10-10-2000 00:00'
|
||||
*
|
||||
* This statement would simply resolve to false, because 1 = 0 is
|
||||
* false. That's why we now check again if we should evaluate the
|
||||
* expression and only continue if we still do.
|
||||
*/
|
||||
if (!ShouldEvaluateExpression((Expr *) expression))
|
||||
{
|
||||
return (Node *) expression_tree_mutator(expression,
|
||||
PartiallyEvaluateExpression,
|
||||
coordinatorEvaluationContext);
|
||||
}
|
||||
|
||||
if (FindNodeCheck(expression, IsVariableExpression))
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -142,6 +142,85 @@ SELECT * FROM composite_type_partitioned_table WHERE col = '(7, 8)'::test_compo
|
|||
6 | (7,8)
|
||||
(1 row)
|
||||
|
||||
CREATE TYPE other_composite_type AS (
|
||||
i integer,
|
||||
i2 integer
|
||||
);
|
||||
-- Check that casts are correctly done on partition columns
|
||||
SELECT run_command_on_coordinator_and_workers($cf$
|
||||
CREATE CAST (other_composite_type AS test_composite_type) WITH INOUT AS IMPLICIT;
|
||||
$cf$);
|
||||
run_command_on_coordinator_and_workers
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type);
|
||||
SELECT * FROM composite_type_partitioned_table WHERE id = 123;
|
||||
id | col
|
||||
---------------------------------------------------------------------
|
||||
123 | (123,456)
|
||||
(1 row)
|
||||
|
||||
EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE)
|
||||
INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type);
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Query: INSERT INTO public.composite_type_partitioned_table_530003 (id, col) VALUES (123, '(123,456)'::public.test_composite_type)
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Insert on public.composite_type_partitioned_table_530003 (actual rows=0 loops=1)
|
||||
-> Result (actual rows=1 loops=1)
|
||||
Output: 123, '(123,456)'::test_composite_type
|
||||
(9 rows)
|
||||
|
||||
SELECT run_command_on_coordinator_and_workers($cf$
|
||||
DROP CAST (other_composite_type as test_composite_type);
|
||||
$cf$);
|
||||
run_command_on_coordinator_and_workers
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT run_command_on_coordinator_and_workers($cf$
|
||||
CREATE FUNCTION to_test_composite_type(arg other_composite_type) RETURNS test_composite_type
|
||||
AS 'select arg::text::test_composite_type;'
|
||||
LANGUAGE SQL
|
||||
IMMUTABLE
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
$cf$);
|
||||
run_command_on_coordinator_and_workers
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT run_command_on_coordinator_and_workers($cf$
|
||||
CREATE CAST (other_composite_type AS test_composite_type) WITH FUNCTION to_test_composite_type(other_composite_type) AS IMPLICIT;
|
||||
$cf$);
|
||||
run_command_on_coordinator_and_workers
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO composite_type_partitioned_table VALUES (456, '(456, 678)'::other_composite_type);
|
||||
EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE)
|
||||
INSERT INTO composite_type_partitioned_table VALUES (123, '(456, 678)'::other_composite_type);
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive) (actual rows=0 loops=1)
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Query: INSERT INTO public.composite_type_partitioned_table_530000 (id, col) VALUES (123, '(456,678)'::public.other_composite_type)
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Insert on public.composite_type_partitioned_table_530000 (actual rows=0 loops=1)
|
||||
-> Result (actual rows=1 loops=1)
|
||||
Output: 123, '(456,678)'::test_composite_type
|
||||
(9 rows)
|
||||
|
||||
-- create and distribute a table on enum type column
|
||||
CREATE TYPE bug_status AS ENUM ('new', 'open', 'closed');
|
||||
CREATE TABLE bugs (
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
CREATE SCHEMA sqlancer_failures;
|
||||
SET search_path TO sqlancer_failures;
|
||||
SET citus.shard_count TO 4;
|
||||
SET citus.shard_replication_factor TO 1;
|
||||
SET citus.next_shard_id TO 92862400;
|
||||
CREATE TABLE t0 (c0 int, c1 MONEY);
|
||||
SELECT create_distributed_table('t0', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
UPDATE t0 SET c1 = ((0.43107963)::MONEY) WHERE ((upper('-14295774') COLLATE "de_CH
|
||||
.utf8") SIMILAR TO '');
|
||||
ERROR: collation "de_CH
|
||||
.utf8" for encoding "UTF8" does not exist
|
||||
UPDATE t0 SET c1 = 1 WHERE '' COLLATE "C" = '';
|
||||
CREATE TABLE t1 (c0 text);
|
||||
SELECT create_distributed_table('t1', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO t1 VALUES ('' COLLATE "C");
|
||||
CREATE TABLE t2 (c0 text, c1 bool, c2 timestamptz default now());
|
||||
SELECT create_distributed_table('t2', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO t2 VALUES ('key', '' COLLATE "C" = '');
|
||||
CREATE TABLE t3 (c0 text, c1 text, c2 timestamptz default now());
|
||||
SELECT create_distributed_table('t3', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO t3 VALUES ('key', '' COLLATE "C");
|
||||
CREATE TABLE t4(c0 real, c1 boolean);
|
||||
SELECT create_distributed_table('t4', 'c1');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO t4 VALUES (1.0, 2 BETWEEN 1 AND 3);
|
||||
-- NOTE: For some reason shard pruning doesn't happen correctly here. It does
|
||||
-- work for non boolean const expressions. See explain plans for t5 below that
|
||||
-- show that. The query still works though. So doesn't seem important enough to
|
||||
-- fix, since boolean partition columns should not happen much/at all for
|
||||
-- actual users.
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = 2 BETWEEN 1 AND 3;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
Task Count: 4
|
||||
Tasks Shown: One of 4
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Seq Scan on t4_92862416 t4
|
||||
Filter: c1
|
||||
(7 rows)
|
||||
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = true;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Seq Scan on t4_92862416 t4
|
||||
Filter: c1
|
||||
(7 rows)
|
||||
|
||||
CREATE TABLE t5(c0 int);
|
||||
SELECT create_distributed_table('t5', 'c0');
|
||||
create_distributed_table
|
||||
---------------------------------------------------------------------
|
||||
|
||||
(1 row)
|
||||
|
||||
INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END);
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Seq Scan on t5_92862423 t5
|
||||
Filter: (c0 = 2)
|
||||
(7 rows)
|
||||
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END;
|
||||
QUERY PLAN
|
||||
---------------------------------------------------------------------
|
||||
Custom Scan (Citus Adaptive)
|
||||
Task Count: 1
|
||||
Tasks Shown: All
|
||||
-> Task
|
||||
Node: host=localhost port=xxxxx dbname=regression
|
||||
-> Seq Scan on t5_92862423 t5
|
||||
Filter: (c0 = 2)
|
||||
(7 rows)
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA sqlancer_failures CASCADE;
|
|
@ -352,6 +352,11 @@ test: multi_deparse_function multi_deparse_procedure
|
|||
# --------
|
||||
test: shared_connection_stats
|
||||
|
||||
# ---------
|
||||
# run queries generated by sql smith and sqlancer that caused issues in the past
|
||||
# --------
|
||||
test: sqlsmith_failures sqlancer_failures
|
||||
|
||||
# ---------
|
||||
# test that no tests leaked intermediate results. This should always be last
|
||||
# ---------
|
||||
|
@ -363,7 +368,3 @@ test: ensure_no_intermediate_data_leak
|
|||
# --------
|
||||
test: ensure_no_shared_connection_leak
|
||||
|
||||
# ---------
|
||||
# run queries generated by sql smith that caused issues in the past
|
||||
# --------
|
||||
test: sqlsmith_failures
|
||||
|
|
|
@ -103,6 +103,42 @@ UPDATE composite_type_partitioned_table SET id = 6 WHERE col = '(7, 8)'::test_c
|
|||
|
||||
SELECT * FROM composite_type_partitioned_table WHERE col = '(7, 8)'::test_composite_type;
|
||||
|
||||
CREATE TYPE other_composite_type AS (
|
||||
i integer,
|
||||
i2 integer
|
||||
);
|
||||
|
||||
-- Check that casts are correctly done on partition columns
|
||||
SELECT run_command_on_coordinator_and_workers($cf$
|
||||
CREATE CAST (other_composite_type AS test_composite_type) WITH INOUT AS IMPLICIT;
|
||||
$cf$);
|
||||
|
||||
INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type);
|
||||
SELECT * FROM composite_type_partitioned_table WHERE id = 123;
|
||||
|
||||
EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE)
|
||||
INSERT INTO composite_type_partitioned_table VALUES (123, '(123, 456)'::other_composite_type);
|
||||
|
||||
SELECT run_command_on_coordinator_and_workers($cf$
|
||||
DROP CAST (other_composite_type as test_composite_type);
|
||||
$cf$);
|
||||
SELECT run_command_on_coordinator_and_workers($cf$
|
||||
CREATE FUNCTION to_test_composite_type(arg other_composite_type) RETURNS test_composite_type
|
||||
AS 'select arg::text::test_composite_type;'
|
||||
LANGUAGE SQL
|
||||
IMMUTABLE
|
||||
RETURNS NULL ON NULL INPUT;
|
||||
$cf$);
|
||||
|
||||
SELECT run_command_on_coordinator_and_workers($cf$
|
||||
CREATE CAST (other_composite_type AS test_composite_type) WITH FUNCTION to_test_composite_type(other_composite_type) AS IMPLICIT;
|
||||
$cf$);
|
||||
INSERT INTO composite_type_partitioned_table VALUES (456, '(456, 678)'::other_composite_type);
|
||||
|
||||
EXPLAIN (ANALYZE TRUE, COSTS FALSE, VERBOSE TRUE, TIMING FALSE, SUMMARY FALSE)
|
||||
INSERT INTO composite_type_partitioned_table VALUES (123, '(456, 678)'::other_composite_type);
|
||||
|
||||
|
||||
|
||||
-- create and distribute a table on enum type column
|
||||
CREATE TYPE bug_status AS ENUM ('new', 'open', 'closed');
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
CREATE SCHEMA sqlancer_failures;
|
||||
SET search_path TO sqlancer_failures;
|
||||
SET citus.shard_count TO 4;
|
||||
SET citus.shard_replication_factor TO 1;
|
||||
SET citus.next_shard_id TO 92862400;
|
||||
|
||||
CREATE TABLE t0 (c0 int, c1 MONEY);
|
||||
SELECT create_distributed_table('t0', 'c0');
|
||||
UPDATE t0 SET c1 = ((0.43107963)::MONEY) WHERE ((upper('-14295774') COLLATE "de_CH
|
||||
.utf8") SIMILAR TO '');
|
||||
UPDATE t0 SET c1 = 1 WHERE '' COLLATE "C" = '';
|
||||
|
||||
CREATE TABLE t1 (c0 text);
|
||||
SELECT create_distributed_table('t1', 'c0');
|
||||
INSERT INTO t1 VALUES ('' COLLATE "C");
|
||||
|
||||
CREATE TABLE t2 (c0 text, c1 bool, c2 timestamptz default now());
|
||||
SELECT create_distributed_table('t2', 'c0');
|
||||
INSERT INTO t2 VALUES ('key', '' COLLATE "C" = '');
|
||||
|
||||
CREATE TABLE t3 (c0 text, c1 text, c2 timestamptz default now());
|
||||
SELECT create_distributed_table('t3', 'c0');
|
||||
INSERT INTO t3 VALUES ('key', '' COLLATE "C");
|
||||
|
||||
CREATE TABLE t4(c0 real, c1 boolean);
|
||||
SELECT create_distributed_table('t4', 'c1');
|
||||
INSERT INTO t4 VALUES (1.0, 2 BETWEEN 1 AND 3);
|
||||
-- NOTE: For some reason shard pruning doesn't happen correctly here. It does
|
||||
-- work for non boolean const expressions. See explain plans for t5 below that
|
||||
-- show that. The query still works though. So doesn't seem important enough to
|
||||
-- fix, since boolean partition columns should not happen much/at all for
|
||||
-- actual users.
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = 2 BETWEEN 1 AND 3;
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t4 WHERE c1 = true;
|
||||
|
||||
CREATE TABLE t5(c0 int);
|
||||
SELECT create_distributed_table('t5', 'c0');
|
||||
INSERT INTO t5 VALUES (CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END);
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = 2;
|
||||
EXPLAIN (COSTS FALSE) SELECT FROM t5 WHERE c0 = CASE WHEN 2 BETWEEN 1 AND 3 THEN 2 ELSE 1 END;
|
||||
|
||||
SET client_min_messages TO WARNING;
|
||||
DROP SCHEMA sqlancer_failures CASCADE;
|
Loading…
Reference in New Issue