ClickHouse/dbms/src/Functions/FunctionsConditional.cpp
Alexey Milovidov e97ecb6cf7 Merge branch 'master' into METR-19266
Conflicts:
	dbms/CMakeLists.txt
	dbms/include/DB/Functions/FunctionsArray.h
	dbms/include/DB/Functions/FunctionsString.h
	dbms/include/DB/Parsers/ExpressionListParsers.h
	dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h
	dbms/include/DB/Storages/MergeTree/MergeTreeReader.h
	dbms/include/DB/Storages/MergeTree/MergeTreeThreadBlockInputStream.h
	dbms/include/DB/Storages/MergeTree/MergedBlockOutputStream.h
	dbms/src/Core/ErrorCodes.cpp
	dbms/src/Parsers/ASTFunction.cpp
	dbms/src/Storages/MergeTree/MergeTreeData.cpp
	dbms/src/Storages/MergeTree/MergeTreeDataPart.cpp
	dbms/src/Storages/MergeTree/MergeTreeReader.cpp
	dbms/src/Storages/StorageLog.cpp
2016-12-10 07:51:36 +03:00

557 lines
17 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <DB/Functions/FunctionsConditional.h>
#include <DB/Functions/FunctionsArray.h>
#include <DB/Functions/FunctionsTransform.h>
#include <DB/Functions/FunctionFactory.h>
#include <DB/Functions/Conditional/common.h>
#include <DB/Functions/Conditional/NullMapBuilder.h>
#include <DB/Functions/Conditional/ArgsInfo.h>
#include <DB/Functions/Conditional/CondSource.h>
#include <DB/Functions/Conditional/NumericPerformer.h>
#include <DB/Functions/Conditional/StringEvaluator.h>
#include <DB/Functions/Conditional/StringArrayEvaluator.h>
#include <DB/Functions/Conditional/CondException.h>
#include <DB/Columns/ColumnNullable.h>
namespace DB
{
void registerFunctionsConditional(FunctionFactory & factory)
{
factory.registerFunction<FunctionIf>();
factory.registerFunction<FunctionMultiIf>();
factory.registerFunction<FunctionCaseWithExpr>();
factory.registerFunction<FunctionCaseWithoutExpr>();
}
namespace
{
/// Check whether at least one of the specified branches of the function multiIf
/// is either a nullable column or a null column inside a given block.
bool blockHasSpecialBranches(const Block & block, const ColumnNumbers & args)
{
auto check = [](const Block & block, size_t arg)
{
const auto & elem = block.unsafeGetByPosition(arg);
return elem.column->isNullable() || elem.column->isNull();
};
size_t else_arg = Conditional::elseArg(args);
for (size_t i = Conditional::firstThen(); i < else_arg; i = Conditional::nextThen(i))
{
if (check(block, args[i]))
return true;
}
if (check(block, args[else_arg]))
return true;
return false;
}
/// Check whether at least one of the specified datatypes is either nullable or null.
bool hasSpecialDataTypes(const DataTypes & args)
{
size_t else_arg = Conditional::elseArg(args);
for (size_t i = Conditional::firstThen(); i < else_arg; i = Conditional::nextThen(i))
{
if (args[i]->isNullable() || args[i]->isNull())
return true;
}
return args[else_arg]->isNullable() || args[else_arg]->isNull();
}
/// Return the type of the first non-null branch. Make it nullable
/// if there is at least one nullable branch or one null branch.
/// This function is used in a very few number of cases in getReturnTypeImpl().
DataTypePtr getReturnTypeFromFirstNonNullBranch(const DataTypes & args, bool has_special_types)
{
auto get_type_to_return = [has_special_types](const DataTypePtr & arg) -> DataTypePtr
{
if (arg->isNullable())
return arg;
else if (has_special_types)
return std::make_shared<DataTypeNullable>(arg);
else
return arg;
};
for (size_t i = Conditional::firstThen(); i < Conditional::elseArg(args); i = Conditional::nextThen(i))
{
if (!args[i]->isNull())
return get_type_to_return(args[i]);
}
size_t i = Conditional::elseArg(args);
if (!args[i]->isNull())
return get_type_to_return(args[i]);
return {};
}
}
/// Implementation of FunctionMultiIf.
FunctionPtr FunctionMultiIf::create(const Context & context)
{
return std::make_shared<FunctionMultiIf>();
}
String FunctionMultiIf::getName() const
{
return name;
}
bool FunctionMultiIf::hasSpecialSupportForNulls() const
{
return true;
}
DataTypePtr FunctionMultiIf::getReturnTypeImpl(const DataTypes & args) const
{
return getReturnTypeInternal(args);
}
void FunctionMultiIf::executeImpl(Block & block, const ColumnNumbers & args, size_t result)
{
try
{
if (!blockHasSpecialBranches(block, args))
{
/// All the branch types are ordinary. No special processing required.
Conditional::NullMapBuilder builder;
perform(block, args, result, builder);
return;
}
/// From the block to be processed, deduce a block whose specified
/// columns are not nullable. We accept null columns because they
/// are processed independently later.
ColumnNumbers args_to_transform;
size_t else_arg = Conditional::elseArg(args);
for (size_t i = Conditional::firstThen(); i < else_arg; i = Conditional::nextThen(i))
args_to_transform.push_back(args[i]);
args_to_transform.push_back(args[else_arg]);
Block block_with_nested_cols = createBlockWithNestedColumns(block, args_to_transform);
/// Create an object that will incrementally build the null map of the
/// result column to be returned.
Conditional::NullMapBuilder builder{block};
/// Now perform multiIf.
perform(block_with_nested_cols, args, result, builder);
/// Store the result.
const ColumnWithTypeAndName & source_col = block_with_nested_cols.unsafeGetByPosition(result);
ColumnWithTypeAndName & dest_col = block.unsafeGetByPosition(result);
if (source_col.column->isNull())
dest_col.column = source_col.column;
else
dest_col.column = std::make_shared<ColumnNullable>(source_col.column, builder.getNullMap());
}
catch (const Conditional::CondException & ex)
{
rethrowContextually(ex);
}
}
DataTypePtr FunctionMultiIf::getReturnTypeInternal(const DataTypes & args) const
{
if (!Conditional::hasValidArgCount(args))
throw Exception{"Invalid number of arguments for function " + getName(),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH};
/// Check that conditions have valid types.
for (size_t i = Conditional::firstCond(); i < Conditional::elseArg(args); i = Conditional::nextCond(i))
{
const IDataType * observed_type;
if (args[i]->isNullable())
{
const DataTypeNullable & nullable_type = static_cast<const DataTypeNullable &>(*args[i]);
observed_type = nullable_type.getNestedType().get();
}
else
observed_type = args[i].get();
if (!typeid_cast<const DataTypeUInt8 *>(observed_type) && !observed_type->isNull())
throw Exception{"Illegal type of argument " + toString(i) + " (condition) "
"of function " + getName() + ". Must be UInt8.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
bool has_special_types = hasSpecialDataTypes(args);
if (Conditional::hasArithmeticBranches(args))
return Conditional::getReturnTypeForArithmeticArgs(args);
else if (Conditional::hasArrayBranches(args))
{
/// NOTE Сообщения об ошибках будут относится к типам элементов массивов, что немного некорректно.
DataTypes new_args;
new_args.reserve(args.size());
auto push_branch_arg = [&args, &new_args](size_t i)
{
if (args[i]->isNull())
new_args.push_back(args[i]);
else
{
const IDataType * observed_type;
if (args[i]->isNullable())
{
const auto & nullable_type = static_cast<const DataTypeNullable &>(*args[i]);
observed_type = nullable_type.getNestedType().get();
}
else
observed_type = args[i].get();
const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(observed_type);
if (type_arr == nullptr)
throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
new_args.push_back(type_arr->getNestedType());
}
};
for (size_t i = 0; i < Conditional::elseArg(args); ++i)
{
if (Conditional::isCond(i))
new_args.push_back(args[i]);
else
push_branch_arg(i);
}
push_branch_arg(Conditional::elseArg(args));
/// NOTE: in a future release, this code will be rewritten. Indeed
/// the current approach is flawed since it cannot appropriately
/// deal with null arguments and arrays that contain null elements.
/// For now we assume that arrays do not contain any such elements.
DataTypePtr elt_type = getReturnTypeImpl(new_args);
if (elt_type->isNullable())
{
DataTypeNullable & nullable_type = static_cast<DataTypeNullable &>(*elt_type);
elt_type = nullable_type.getNestedType();
}
DataTypePtr type = std::make_shared<DataTypeArray>(elt_type);
if (has_special_types)
type = std::make_shared<DataTypeNullable>(type);
return type;
}
else if (!Conditional::hasIdenticalTypes(args))
{
if (Conditional::hasFixedStrings(args))
{
if (!Conditional::hasFixedStringsOfIdenticalLength(args))
throw Exception{"Branch (then, else) arguments of function " + getName() +
" have FixedString type and different sizes",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
auto type = getReturnTypeFromFirstNonNullBranch(args, has_special_types);
if (type)
return type;
else
{
/// This cannot happen: at least one fixed string is not null.
throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR};
}
}
else if (Conditional::hasStrings(args))
{
DataTypePtr type = std::make_shared<DataTypeString>();
if (has_special_types)
type = std::make_shared<DataTypeNullable>(type);
return type;
}
else
throw Exception{
"Incompatible branch (then, else) arguments for function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
}
else
{
auto type = getReturnTypeFromFirstNonNullBranch(args, has_special_types);
if (type)
return type;
/// All the branches are null.
return std::make_shared<DataTypeNull>();
}
}
void FunctionMultiIf::perform(Block & block, const ColumnNumbers & args, size_t result, Conditional::NullMapBuilder & builder)
{
if (performTrivialCase(block, args, result, builder))
return;
if (Conditional::NumericPerformer::perform(block, args, result, builder))
return;
if (Conditional::StringEvaluator::perform(block, args, result, builder))
return;
if (Conditional::StringArrayEvaluator::perform(block, args, result, builder))
return;
throw Exception{"One or more branch (then, else) columns of function "
+ getName() + " have illegal or incompatible types",
ErrorCodes::ILLEGAL_COLUMN};
}
bool FunctionMultiIf::performTrivialCase(Block & block, const ColumnNumbers & args,
size_t result, Conditional::NullMapBuilder & builder)
{
/// Check that all the branches have the same type. Moreover
/// some or all these branches may be null.
std::string first_type_name;
DataTypePtr type;
size_t else_arg = Conditional::elseArg(args);
for (size_t i = Conditional::firstThen(); i < else_arg; i = Conditional::nextThen(i))
{
if (!block.getByPosition(args[i]).type->isNull())
{
const auto & name = block.getByPosition(args[i]).type->getName();
if (first_type_name.empty())
{
first_type_name = name;
type = block.getByPosition(args[i]).type;
}
else
{
if (name != first_type_name)
return false;
}
}
}
if (!block.getByPosition(args[else_arg]).type->isNull())
{
if (first_type_name.empty())
type = block.getByPosition(args[else_arg]).type;
else
{
const auto & name = block.getByPosition(args[else_arg]).type->getName();
if (name != first_type_name)
return false;
}
}
size_t row_count = block.rowsInFirstColumn();
auto & res_col = block.getByPosition(result).column;
if (!type)
{
/// Degenerate case: all the branches are null.
res_col = std::make_shared<ColumnNull>(row_count, Null());
return true;
}
/// Check that all the conditions are constants.
for (size_t i = Conditional::firstCond(); i < else_arg; i = Conditional::nextCond(i))
{
const IColumn * col = block.getByPosition(args[i]).column.get();
if (!col->isConst())
return false;
}
/// Initialize readers for the conditions.
Conditional::CondSources conds;
conds.reserve(Conditional::getCondCount(args));
for (size_t i = Conditional::firstCond(); i < else_arg; i = Conditional::nextCond(i))
conds.emplace_back(block, args, i);
/// Perform multiIf.
auto make_result = [&](size_t index)
{
res_col = block.getByPosition(index).column;
if (res_col->isNull())
{
/// The return type of multiIf is Nullable(T). Therefore we create
/// a constant column whose type is T with a default value.
/// Subsequently the null map builder will mark it as null.
res_col = type->createConstColumn(row_count, type->getDefault());
}
if (builder)
builder.build(index);
};
size_t i = Conditional::firstCond();
for (const auto & cond : conds)
{
if (cond.get(0))
{
make_result(args[Conditional::thenFromCond(i)]);
return true;
}
i = Conditional::nextCond(i);
}
make_result(args[else_arg]);
return true;
}
/// Translate a context-free error into a contextual error.
void FunctionMultiIf::rethrowContextually(const Conditional::CondException & ex) const
{
if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_ILLEGAL_COLUMN_TYPE)
throw Exception{"Illegal type of column " + ex.getMsg1() +
" of function multiIf", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
else if (ex.getCode() == Conditional::CondErrorCodes::TYPE_DEDUCER_UPSCALING_ERROR)
throw Exception{"Arguments of function multiIf are not upscalable to a "
"common type without loss of precision: " + ex.getMsg1(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
else if (ex.getCode() == Conditional::CondErrorCodes::NUMERIC_PERFORMER_ILLEGAL_COLUMN)
throw Exception{"Illegal argument " + ex.getMsg1() + " of function multiIf",
ErrorCodes::ILLEGAL_COLUMN};
else if (ex.getCode() == Conditional::CondErrorCodes::COND_SOURCE_ILLEGAL_COLUMN)
throw Exception{"Illegal column " + ex.getMsg1() + " of argument "
+ ex.getMsg2() + " of function multiIf. "
"Must be ColumnUInt8 or ColumnConstUInt8.", ErrorCodes::ILLEGAL_COLUMN};
else if (ex.getCode() == Conditional::CondErrorCodes::NUMERIC_EVALUATOR_ILLEGAL_ARGUMENT)
throw Exception{"Illegal type of argument " + ex.getMsg1() + " of function multiIf",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
else if (ex.getCode() == Conditional::CondErrorCodes::ARRAY_EVALUATOR_INVALID_TYPES)
throw Exception{"Internal logic error: one or more arguments of function "
"multiIf have invalid types", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
else
throw Exception{"An unexpected error has occurred while performing multiIf",
ErrorCodes::LOGICAL_ERROR};
}
/// Implementation of FunctionCaseWithExpr.
FunctionPtr FunctionCaseWithExpr::create(const Context & context_)
{
return std::make_shared<FunctionCaseWithExpr>(context_);
}
FunctionCaseWithExpr::FunctionCaseWithExpr(const Context & context_)
: context{context_}
{
}
String FunctionCaseWithExpr::getName() const
{
return name;
}
DataTypePtr FunctionCaseWithExpr::getReturnTypeImpl(const DataTypes & args) const
{
/// See the comments in executeImpl() to understand why we actually have to
/// get the return type of a transform function.
/// Get the return types of the arrays that we pass to the transform function.
DataTypes src_array_types;
DataTypes dst_array_types;
for (size_t i = 1; i < (args.size() - 1); ++i)
{
if ((i % 2) != 0)
src_array_types.push_back(args[i]);
else
dst_array_types.push_back(args[i]);
}
FunctionArray fun_array{context};
DataTypePtr src_array_type = fun_array.getReturnTypeImpl(src_array_types);
DataTypePtr dst_array_type = fun_array.getReturnTypeImpl(dst_array_types);
/// Finally get the return type of the transform function.
FunctionTransform fun_transform;
return fun_transform.getReturnTypeImpl({args.front(), src_array_type, dst_array_type, args.back()});
}
void FunctionCaseWithExpr::executeImpl(Block & block, const ColumnNumbers & args, size_t result)
{
/// In the following code, we turn the construction:
/// CASE expr WHEN val[0] THEN branch[0] ... WHEN val[N-1] then branch[N-1] ELSE branchN
/// into the construction transform(expr, src, dest, branchN)
/// where:
/// src = [val[0], val[1], ..., val[N-1]]
/// dest = [branch[0], ..., branch[N-1]]
/// then we perform it.
/// Create the arrays required by the transform function.
ColumnNumbers src_array_args;
DataTypes src_array_types;
ColumnNumbers dst_array_args;
DataTypes dst_array_types;
for (size_t i = 1; i < (args.size() - 1); ++i)
{
if ((i % 2) != 0)
{
src_array_args.push_back(args[i]);
src_array_types.push_back(block.getByPosition(args[i]).type);
}
else
{
dst_array_args.push_back(args[i]);
dst_array_types.push_back(block.getByPosition(args[i]).type);
}
}
FunctionArray fun_array{context};
DataTypePtr src_array_type = fun_array.getReturnTypeImpl(src_array_types);
DataTypePtr dst_array_type = fun_array.getReturnTypeImpl(dst_array_types);
Block temp_block = block;
size_t src_array_pos = temp_block.columns();
temp_block.insert({nullptr, src_array_type, ""});
size_t dst_array_pos = temp_block.columns();
temp_block.insert({nullptr, dst_array_type, ""});
fun_array.executeImpl(temp_block, src_array_args, src_array_pos);
fun_array.executeImpl(temp_block, dst_array_args, dst_array_pos);
/// Execute transform.
FunctionTransform fun_transform;
ColumnNumbers transform_args{args.front(), src_array_pos, dst_array_pos, args.back()};
fun_transform.executeImpl(temp_block, transform_args, result);
/// Put the result into the original block.
block.getByPosition(result).column = std::move(temp_block.getByPosition(result).column);
}
/// Implementation of FunctionCaseWithoutExpr.
FunctionPtr FunctionCaseWithoutExpr::create(const Context & context_)
{
return std::make_shared<FunctionCaseWithoutExpr>();
}
String FunctionCaseWithoutExpr::getName() const
{
return name;
}
bool FunctionCaseWithoutExpr::hasSpecialSupportForNulls() const
{
return true;
}
DataTypePtr FunctionCaseWithoutExpr::getReturnTypeImpl(const DataTypes & args) const
{
FunctionMultiIf fun_multi_if;
return fun_multi_if.getReturnTypeImpl(args);
}
void FunctionCaseWithoutExpr::executeImpl(Block & block, const ColumnNumbers & args, size_t result)
{
/// A CASE construction without any expression is a straightforward multiIf.
FunctionMultiIf fun_multi_if;
fun_multi_if.executeImpl(block, args, result);
}
}