ClickHouse/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

167 lines
5.4 KiB
C++
Raw Normal View History

#include <Interpreters/ExpressionActions.h>
#include <Interpreters/convertFieldToType.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <DataTypes/DataTypesNumber.h>
#include <Interpreters/OptimizeShardingKeyRewriteInVisitor.h>
2024-02-27 18:17:34 +00:00
#include "Analyzer/ColumnNode.h"
#include "Analyzer/ConstantNode.h"
#include "Analyzer/FunctionNode.h"
#include "Analyzer/IQueryTreeNode.h"
#include "Analyzer/InDepthQueryTreeVisitor.h"
#include "DataTypes/IDataType.h"
namespace
{
using namespace DB;
Field executeFunctionOnField(
const Field & field,
const std::string & name,
const ExpressionActionsPtr & sharding_expr,
const DataTypePtr & type,
const std::string & sharding_key_column_name)
{
ColumnWithTypeAndName column;
column.column = type->createColumnConst(1, field);
column.name = name;
column.type = type;
Block block{column};
size_t num_rows = 1;
sharding_expr->execute(block, num_rows);
ColumnWithTypeAndName & ret = block.getByName(sharding_key_column_name);
return (*ret.column)[0];
}
/// @param sharding_column_value - one of values from IN
/// @param sharding_column_name - name of that column
/// @return true if shard may contain such value (or it is unknown), otherwise false.
bool shardContains(
Field sharding_column_value,
const std::string & sharding_column_name,
const OptimizeShardingKeyRewriteInMatcher::Data & data)
{
/// Implicit conversion.
sharding_column_value = convertFieldToType(sharding_column_value, *data.sharding_key_type);
/// NULL is not allowed in sharding key,
/// so it should be safe to assume that shard cannot contain it.
if (sharding_column_value.isNull())
return false;
Field sharding_value = executeFunctionOnField(
sharding_column_value, sharding_column_name,
data.sharding_key_expr, data.sharding_key_type,
data.sharding_key_column_name);
/// The value from IN can be non-numeric,
/// but in this case it should be convertible to numeric type, let's try.
///
/// NOTE: that conversion should not be done for signed types,
/// since it uses accurate cast, that will return Null,
/// but we need static_cast<> (as createBlockSelector()).
if (!isInt64OrUInt64FieldType(sharding_value.getType()))
sharding_value = convertFieldToType(sharding_value, DataTypeUInt64());
/// In case of conversion is not possible (NULL), shard cannot contain the value anyway.
if (sharding_value.isNull())
return false;
UInt64 value = sharding_value.get<UInt64>();
const auto shard_num = data.slots[value % data.slots.size()] + 1;
return data.shard_info.shard_num == shard_num;
}
}
namespace DB
{
bool OptimizeShardingKeyRewriteInMatcher::needChildVisit(ASTPtr & /*node*/, const ASTPtr & /*child*/)
{
return true;
}
void OptimizeShardingKeyRewriteInMatcher::visit(ASTPtr & node, Data & data)
{
if (auto * function = node->as<ASTFunction>())
visit(*function, data);
}
void OptimizeShardingKeyRewriteInMatcher::visit(ASTFunction & function, Data & data)
{
if (function.name != "in")
return;
auto * left = function.arguments->children.front().get();
auto * right = function.arguments->children.back().get();
auto * identifier = left->as<ASTIdentifier>();
if (!identifier)
return;
2023-08-10 18:41:57 +00:00
auto name = identifier->shortName();
if (!data.sharding_key_expr->getRequiredColumnsWithTypes().contains(name))
return;
if (auto * tuple_func = right->as<ASTFunction>(); tuple_func && tuple_func->name == "tuple")
{
auto * tuple_elements = tuple_func->children.front()->as<ASTExpressionList>();
std::erase_if(tuple_elements->children, [&](auto & child)
{
auto * literal = child->template as<ASTLiteral>();
2023-08-10 18:41:57 +00:00
return tuple_elements->children.size() > 1 && literal && !shardContains(literal->value, name, data);
});
}
else if (auto * tuple_literal = right->as<ASTLiteral>();
tuple_literal && tuple_literal->value.getType() == Field::Types::Tuple)
{
auto & tuple = tuple_literal->value.get<Tuple &>();
std::erase_if(tuple, [&](auto & child)
{
2023-08-10 18:41:57 +00:00
return tuple.size() > 1 && !shardContains(child, name, data);
});
}
}
2024-02-27 18:17:34 +00:00
class OptimizeShardingKeyRewriteIn : InDepthQueryTreeVisitorWithContext<OptimizeShardingKeyRewriteIn>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<OptimizeShardingKeyRewriteIn>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
auto * function_node = node->as<FunctionNode>();
if (!function_node || function_node->getFunctionName() != "in")
return;
auto & arguments = function_node->getArguments().getNodes();
auto * column = arguments[0]->as<ColumnNode>();
if (!column)
return;
if (!data.sharding_key_expr->getRequiredColumnsWithTypes().contains(column->getColumnName()))
return;
if (auto * constant = arguments[1]->as<ConstantNode>())
{
if (isTuple(constant->getResultType()))
{
auto & tuple = constant->getValue().get<Tuple &>();
std::erase_if(tuple, [&](auto & child)
{
return tuple.size() > 1 && !shardContains(child, name, data);
});
}
}
}
OptimizeShardingKeyRewriteInMatcher::Data data;
};
}