Merge pull request #23219 from azat/optimize_skip_unused_shards_rewrite_in-fix

Add type conversion for optimize_skip_unused_shards_rewrite_in
This commit is contained in:
alexey-milovidov 2021-04-19 22:45:35 +03:00 committed by GitHub
commit 5b66086bc9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 10 deletions

View File

@ -4,6 +4,7 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <DataTypes/FieldToDataType.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
namespace
@ -13,7 +14,7 @@ using namespace DB;
Field executeFunctionOnField(
const Field & field, const std::string & name,
const ExpressionActionsPtr & expr,
const ExpressionActionsPtr & sharding_expr,
const std::string & sharding_key_column_name)
{
DataTypePtr type = applyVisitor(FieldToDataType{}, field);
@ -25,17 +26,23 @@ Field executeFunctionOnField(
Block block{column};
size_t num_rows = 1;
expr->execute(block, num_rows);
sharding_expr->execute(block, num_rows);
ColumnWithTypeAndName & ret = block.getByName(sharding_key_column_name);
return (*ret.column)[0];
}
/// Return true if shard may contain such value (or it is unknown), otherwise false.
/// @param sharding_column_value - one of values from IN
/// @param sharding_column_name - name of that column
/// @param sharding_expr - expression of sharding_key for the Distributed() table
/// @param sharding_key_column_name - name of the column for sharding_expr
/// @param shard_info - info for the current shard (to compare shard_num with calculated)
/// @param slots - weight -> shard mapping
/// @return true if shard may contain such value (or it is unknown), otherwise false.
bool shardContains(
const Field & sharding_column_value,
const std::string & sharding_column_name,
const ExpressionActionsPtr & expr,
const ExpressionActionsPtr & sharding_expr,
const std::string & sharding_key_column_name,
const Cluster::ShardInfo & shard_info,
const Cluster::SlotToShard & slots)
@ -45,7 +52,14 @@ bool shardContains(
if (sharding_column_value.isNull())
return false;
Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, expr, sharding_key_column_name);
Field sharding_value = executeFunctionOnField(sharding_column_value, sharding_column_name, sharding_expr, sharding_key_column_name);
/// The value from IN can be non-numeric,
/// but in this case it should be convertible to numeric type, let's try.
sharding_value = convertFieldToType(sharding_value, DataTypeUInt64());
/// In case of conversion is not possible (NULL), shard cannot contain the value anyway.
if (sharding_value.isNull())
return false;
UInt64 value = sharding_value.get<UInt64>();
const auto shard_num = slots[value % slots.size()] + 1;
return shard_info.shard_num == shard_num;
@ -78,10 +92,10 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
if (!identifier)
return;
const auto & expr = data.sharding_key_expr;
const auto & sharding_expr = data.sharding_key_expr;
const auto & sharding_key_column_name = data.sharding_key_column_name;
if (!expr->getRequiredColumnsWithTypes().contains(identifier->name()))
if (!sharding_expr->getRequiredColumnsWithTypes().contains(identifier->name()))
return;
/// NOTE: that we should not take care about empty tuple,
@ -93,7 +107,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
std::erase_if(tuple_elements->children, [&](auto & child)
{
auto * literal = child->template as<ASTLiteral>();
return literal && !shardContains(literal->value, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
return literal && !shardContains(literal->value, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots);
});
}
else if (auto * tuple_literal = right->as<ASTLiteral>();
@ -102,7 +116,7 @@ void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & d
auto & tuple = tuple_literal->value.get<Tuple &>();
std::erase_if(tuple, [&](auto & child)
{
return !shardContains(child, identifier->name(), expr, sharding_key_column_name, data.shard_info, data.slots);
return !shardContains(child, identifier->name(), sharding_expr, sharding_key_column_name, data.shard_info, data.slots);
});
}
}

View File

@ -17,6 +17,9 @@ others
0
0
0
different types -- prohibited
different types -- conversion
0
optimize_skip_unused_shards_limit
0
0

View File

@ -5,6 +5,7 @@
drop table if exists dist_01756;
drop table if exists dist_01756_str;
drop table if exists dist_01756_column;
drop table if exists data_01756_str;
-- SELECT
@ -90,8 +91,10 @@ select * from dist_01756 where dummy in (0); -- { serverError 507 }
-- optimize_skip_unused_shards does not support non-constants
select * from dist_01756 where dummy in (select * from system.one); -- { serverError 507 }
select * from dist_01756 where dummy in (toUInt8(0)); -- { serverError 507 }
-- wrong type
-- wrong type (tuple)
select * from dist_01756 where dummy in ('0'); -- { serverError 507 }
-- intHash64 does not accept string
select * from dist_01756 where dummy in ('0', '2'); -- { serverError 43 }
-- NOT IN does not supported
select * from dist_01756 where dummy not in (0, 2); -- { serverError 507 }
@ -110,6 +113,7 @@ select (2 IN (2,)), * from dist_01756 where dummy in (0, 2) format Null;
select (dummy IN (toUInt8(2),)), * from dist_01756 where dummy in (0, 2) format Null;
-- different type
select 'different types -- prohibited';
create table data_01756_str (key String) engine=Memory();
create table dist_01756_str as data_01756_str engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01756_str, cityHash64(key));
select * from dist_01756_str where key in ('0', '2');
@ -117,6 +121,12 @@ select * from dist_01756_str where key in ('0', Null); -- { serverError 507 }
select * from dist_01756_str where key in (0, 2); -- { serverError 53 }
select * from dist_01756_str where key in (0, Null); -- { serverError 53 }
-- different type #2
select 'different types -- conversion';
create table dist_01756_column as system.one engine=Distributed(test_cluster_two_shards, system, one, dummy);
select * from dist_01756_column where dummy in (0, '255');
select * from dist_01756_column where dummy in (0, '255foo'); -- { serverError 53 }
-- optimize_skip_unused_shards_limit
select 'optimize_skip_unused_shards_limit';
select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_shards_limit=1; -- { serverError 507 }
@ -124,4 +134,5 @@ select * from dist_01756 where dummy in (0, 2) settings optimize_skip_unused_sha
drop table dist_01756;
drop table dist_01756_str;
drop table dist_01756_column;
drop table data_01756_str;