Merge pull request #60129 from bigo-sg/short_circut_func

Short circuit optimization for functions executed over Nullable arguments
This commit is contained in:
Pavel Kruglov 2024-11-19 13:31:55 +00:00 committed by GitHub
commit e3e4e45278
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 237 additions and 71 deletions

View File

@ -24,7 +24,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
ssize_t from = data.size() - 1;
ssize_t index = mask.size() - 1;
data.resize(mask.size());
data.resize_exact(mask.size());
while (index >= 0)
{
if (!!mask[index] ^ inverted)

View File

@ -61,6 +61,8 @@
M(ArenaAllocBytes, "Number of bytes allocated for memory Arena (used for GROUP BY and similar operations)", ValueType::Bytes) \
M(FunctionExecute, "Number of SQL ordinary function calls (SQL functions are called on per-block basis, so this number represents the number of blocks).", ValueType::Number) \
M(TableFunctionExecute, "Number of table function calls.", ValueType::Number) \
M(DefaultImplementationForNullsRows, "Number of rows processed by default implementation for nulls in function execution", ValueType::Number) \
M(DefaultImplementationForNullsRowsWithNulls, "Number of rows which contain null values processed by default implementation for nulls in function execution", ValueType::Number) \
M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.", ValueType::Number) \
M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \

View File

@ -5520,6 +5520,13 @@ Only available in ClickHouse Cloud. Number of background threads for speculative
)", 0) \
DECLARE(Int64, ignore_cold_parts_seconds, 0, R"(
Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.
)", 0) \
DECLARE(Bool, short_circuit_function_evaluation_for_nulls, true, R"(
Allows to execute functions with Nullable arguments only on rows with non-NULL values in all arguments when ratio of NULL values in arguments exceeds short_circuit_function_evaluation_for_nulls_threshold. Applies only to functions that return NULL value for rows with at least one NULL value in arguments.
)", 0) \
DECLARE(Double, short_circuit_function_evaluation_for_nulls_threshold, 1.0, R"(
Ratio threshold of NULL values to execute functions with Nullable arguments only on rows with non-NULL values in all arguments. Applies when setting short_circuit_function_evaluation_for_nulls is enabled.
When the ratio of rows containing NULL values to the total number of rows exceeds this threshold, these rows containing NULL values will not be evaluated.
)", 0) \
DECLARE(Int64, prefer_warmed_unmerged_parts_seconds, 0, R"(
Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.

View File

@ -64,6 +64,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
},
{"24.11",
{
{"short_circuit_function_evaluation_for_nulls", false, true, "Allow to execute functions with Nullable arguments only on rows with non-NULL values in all arguments"},
{"short_circuit_function_evaluation_for_nulls_threshold", 1.0, 1.0, "Ratio threshold of NULL values to execute functions with Nullable arguments only on rows with non-NULL values in all arguments. Applies when setting short_circuit_function_evaluation_for_nulls is enabled."},
{"enable_http_compression", false, true, "Improvement for read-only clients since they can't change settings"},
{"validate_mutation_query", false, true, "New setting to validate mutation queries by default."},
{"enable_job_stack_trace", false, true, "Enable by default collecting stack traces from job's scheduling."},

View File

@ -1,10 +1,10 @@
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/assert_cast.h>
@ -13,11 +13,11 @@ namespace DB
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column)
@ -27,8 +27,7 @@ const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * co
const ColumnConst * res = assert_cast<const ColumnConst *>(column);
if (checkColumn<ColumnString>(&res->getDataColumn())
|| checkColumn<ColumnFixedString>(&res->getDataColumn()))
if (checkColumn<ColumnString>(&res->getDataColumn()) || checkColumn<ColumnFixedString>(&res->getDataColumn()))
return res;
return {};
@ -101,18 +100,22 @@ String withOrdinalEnding(size_t i)
{
switch (i)
{
case 0: return "1st";
case 1: return "2nd";
case 2: return "3rd";
default: return std::to_string(i) + "th";
case 0:
return "1st";
case 1:
return "2nd";
case 2:
return "3rd";
default:
return std::to_string(i) + "th";
}
}
void validateArgumentsImpl(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
size_t argument_offset,
const FunctionArgumentDescriptors & descriptors)
void validateArgumentsImpl(
const IFunction & func,
const ColumnsWithTypeAndName & arguments,
size_t argument_offset,
const FunctionArgumentDescriptors & descriptors)
{
for (size_t i = 0; i < descriptors.size(); ++i)
{
@ -123,13 +126,14 @@ void validateArgumentsImpl(const IFunction & func,
const auto & arg = arguments[i + argument_offset];
const auto & descriptor = descriptors[i];
if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
throw Exception(error_code,
"A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
withOrdinalEnding(argument_offset + i),
descriptor.name,
func.getName(),
descriptor.type_name,
arg.type ? arg.type->getName() : "<?>");
throw Exception(
error_code,
"A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
withOrdinalEnding(argument_offset + i),
descriptor.name,
func.getName(),
descriptor.type_name,
arg.type ? arg.type->getName() : "<?>");
}
}
@ -149,26 +153,35 @@ int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const Col
return 0;
}
void validateFunctionArguments(const IFunction & func,
const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args)
void validateFunctionArguments(
const IFunction & func,
const ColumnsWithTypeAndName & arguments,
const FunctionArgumentDescriptors & mandatory_args,
const FunctionArgumentDescriptors & optional_args)
{
if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size())
{
auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
auto argument_singular_or_plural
= [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
String expected_args_string;
if (!mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args));
expected_args_string = fmt::format(
"{} mandatory {} and {} optional {}",
mandatory_args.size(),
argument_singular_or_plural(mandatory_args),
optional_args.size(),
argument_singular_or_plural(optional_args));
else if (!mandatory_args.empty() && optional_args.empty())
expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
expected_args_string = fmt::format(
"{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
else if (mandatory_args.empty() && !optional_args.empty())
expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args));
else
expected_args_string = "0 arguments";
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"An incorrect number of arguments was specified for function '{}'. Expected {}, got {}",
func.getName(),
expected_args_string,
@ -204,7 +217,8 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
return {nested_columns, offsets->data()};
}
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count)
ColumnPtr
wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count)
{
ColumnPtr result_null_map_column;
@ -262,6 +276,39 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & a
return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column);
}
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnPtr & null_map)
{
if (src->onlyNull())
return src;
ColumnPtr result_null_map_column;
ColumnPtr src_not_nullable = src;
if (const auto * nullable = checkAndGetColumn<ColumnNullable>(src.get()))
{
src_not_nullable = nullable->getNestedColumnPtr();
result_null_map_column = nullable->getNullMapColumnPtr();
MutableColumnPtr mutable_result_null_map_column = IColumn::mutate(std::move(result_null_map_column));
NullMap & result_null_map = assert_cast<ColumnUInt8 &>(*mutable_result_null_map_column).getData();
const NullMap & null_map_data = assert_cast<const ColumnUInt8 &>(*null_map).getData();
for (size_t i = 0; i < result_null_map.size(); ++i)
result_null_map[i] |= null_map_data[i];
result_null_map_column = std::move(mutable_result_null_map_column);
return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column);
}
else if (const auto * const_src = checkAndGetColumn<ColumnConst>(src.get()))
{
const NullMap & null_map_data = assert_cast<const ColumnUInt8 &>(*null_map).getData();
ColumnPtr result_null_map = ColumnUInt8::create(1, null_map_data[0] || const_src->isNullAt(0));
const auto * nullable_data = checkAndGetColumn<ColumnNullable>(&const_src->getDataColumn());
auto data_not_nullable = nullable_data ? nullable_data->getNestedColumnPtr() : const_src->getDataColumnPtr();
return ColumnConst::create(ColumnNullable::create(data_not_nullable, result_null_map), const_src->size());
}
else
return ColumnNullable::create(src->convertToFullColumnIfConst(), null_map);
}
NullPresence getNullPresense(const ColumnsWithTypeAndName & args)
{
NullPresence res;

View File

@ -169,6 +169,11 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count);
/** Return ColumnNullable of src, with input null map
* Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
*/
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnPtr & null_map);
struct NullPresence
{
bool has_nullable = false;

View File

@ -1,5 +1,5 @@
#include <Functions/IFunctionAdaptors.h>
#include <Functions/FunctionDynamicAdaptor.h>
#include <Functions/IFunctionAdaptors.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnLowCardinality.h>
@ -7,13 +7,18 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/MaskOperations.h>
#include <Core/Block.h>
#include <Core/Settings.h>
#include <Core/TypeId.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/Native.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/Context.h>
#include <Common/CurrentThread.h>
#include <Common/SipHash.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
@ -27,15 +32,26 @@
# include <llvm/IR/IRBuilder.h>
#endif
namespace ProfileEvents
{
extern const Event DefaultImplementationForNullsRows;
extern const Event DefaultImplementationForNullsRowsWithNulls;
}
namespace DB
{
namespace Setting
{
extern const SettingsBool short_circuit_function_evaluation_for_nulls;
extern const SettingsDouble short_circuit_function_evaluation_for_nulls_threshold;
}
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int ILLEGAL_COLUMN;
}
namespace
@ -67,9 +83,7 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible type for LowCardinality column: {}",
column.type->getName());
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", column.type->getName());
if (can_be_executed_on_default_arguments)
{
@ -122,10 +136,7 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
/// Check that these arguments are really constant.
for (auto arg_num : arguments_to_remain_constants)
if (arg_num < args.size() && !isColumnConst(*args[arg_num].column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Argument at index {} for function {} must be constant",
arg_num,
getName());
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument at index {} for function {} must be constant", arg_num, getName());
if (args.empty() || !useDefaultImplementationForConstants() || !allArgumentsAreConstants(args))
return nullptr;
@ -139,14 +150,16 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
{
const ColumnWithTypeAndName & column = args[arg_num];
if (arguments_to_remain_constants.end() != std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
if (arguments_to_remain_constants.end()
!= std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
{
temporary_columns.emplace_back(ColumnWithTypeAndName{column.column->cloneResized(1), column.type, column.name});
}
else
{
have_converted_columns = true;
temporary_columns.emplace_back(ColumnWithTypeAndName{ assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name });
temporary_columns.emplace_back(
ColumnWithTypeAndName{assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name});
}
}
@ -154,7 +167,8 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
* not in "arguments_to_remain_constants" set. Otherwise we get infinite recursion.
*/
if (!have_converted_columns)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: the function requires more arguments",
getName());
@ -177,7 +191,7 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
NullPresence null_presence = getNullPresense(args);
if (null_presence.has_null_constant)
if (null_presence.has_null_constant || null_presence.has_nullable)
{
// Default implementation for nulls returns null result for null arguments,
// so the result type must be nullable.
@ -188,17 +202,87 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
"is expected to return Nullable result, got {}",
getName(),
result_type->getName());
}
if (null_presence.has_null_constant)
{
/// If any of the input arguments is null literal, the result is null constant.
return result_type->createColumnConstWithDefaultValue(input_rows_count);
}
if (null_presence.has_nullable)
{
/// Usually happens during analyzing. We should return non-const column to avoid wrong constant folding.
if (input_rows_count == 0)
return result_type->createColumn();
auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
auto & result_null_map_data = result_null_map->getData();
bool all_columns_constant = true;
for (const auto & arg : args)
{
if (!isColumnConst(*arg.column))
all_columns_constant = false;
if (arg.type->isNullable())
{
if (isColumnConst(*arg.column))
{
if (arg.column->onlyNull())
{
/// If any of input columns contains a null constant, the result is null constant.
return result_type->createColumnConstWithDefaultValue(input_rows_count);
}
}
else
{
const auto & null_map = assert_cast<const ColumnNullable &>(*arg.column).getNullMapData();
for (size_t i = 0; i < input_rows_count; ++i)
result_null_map_data[i] |= null_map[i];
}
}
}
size_t rows_with_nulls = countBytesInFilter(result_null_map_data.data(), 0, input_rows_count);
size_t rows_without_nulls = input_rows_count - rows_with_nulls;
ProfileEvents::increment(ProfileEvents::DefaultImplementationForNullsRows, input_rows_count);
ProfileEvents::increment(ProfileEvents::DefaultImplementationForNullsRowsWithNulls, rows_with_nulls);
if (rows_without_nulls == 0 && !all_columns_constant)
{
/// Don't need to evaluate function if each row contains at least one null value and not all input columns are constant.
return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst();
}
double null_ratio = rows_with_nulls / static_cast<double>(result_null_map_data.size());
bool should_short_circuit = short_circuit_function_evaluation_for_nulls && !all_columns_constant
&& null_ratio >= short_circuit_function_evaluation_for_nulls_threshold;
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
return wrapInNullable(res, args, result_type, input_rows_count);
if (!should_short_circuit)
{
/// Each row should be evaluated if there are no nulls or short circuiting is disabled.
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
auto new_res = wrapInNullable(res, std::move(result_null_map));
return new_res;
}
else
{
/// If short circuit is enabled, we only execute the function on rows with all arguments not null
/// Filter every column by mask
for (auto & col : temporary_columns)
col.column = col.column->filter(result_null_map_data, rows_without_nulls);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, rows_without_nulls, dry_run);
auto mutable_res = IColumn::mutate(std::move(res));
mutable_res->expand(result_null_map_data, false);
auto new_res = wrapInNullable(std::move(mutable_res), std::move(result_null_map));
return new_res;
}
}
return nullptr;
@ -260,7 +344,21 @@ static void convertSparseColumnsToFull(ColumnsWithTypeAndName & args)
column.column = recursiveRemoveSparse(column.column);
}
ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
IExecutableFunction::IExecutableFunction()
{
if (CurrentThread::isInitialized())
{
auto query_context = CurrentThread::get().getQueryContext();
if (query_context && query_context->getSettingsRef()[Setting::short_circuit_function_evaluation_for_nulls])
{
short_circuit_function_evaluation_for_nulls = true;
short_circuit_function_evaluation_for_nulls_threshold = query_context->getSettingsRef()[Setting::short_circuit_function_evaluation_for_nulls_threshold];
}
}
}
ColumnPtr IExecutableFunction::executeWithoutSparseColumns(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
{
ColumnPtr result;
if (useDefaultImplementationForLowCardinalityColumns())
@ -273,19 +371,16 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
size_t new_input_rows_count = columns_without_low_cardinality.empty()
? input_rows_count
: columns_without_low_cardinality.front().column->size();
size_t new_input_rows_count
= columns_without_low_cardinality.empty() ? input_rows_count : columns_without_low_cardinality.front().column->size();
checkFunctionArgumentSizes(columns_without_low_cardinality, new_input_rows_count);
auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run);
bool res_is_constant = isColumnConst(*res);
auto keys = res_is_constant
? res->cloneResized(1)->convertToFullColumnIfConst()
: res;
auto keys = res_is_constant ? res->cloneResized(1)->convertToFullColumnIfConst() : res;
auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType());
ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size());
@ -311,7 +406,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
return result;
}
ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
ColumnPtr IExecutableFunction::execute(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
{
checkFunctionArgumentSizes(arguments, input_rows_count);
@ -372,7 +468,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1)
{
const auto & offsets_data = assert_cast<const ColumnVector<UInt64> &>(*sparse_offsets).getData();
return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/ 1);
return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/1);
}
return ColumnSparse::create(res, sparse_offsets, input_rows_count);
@ -398,7 +494,8 @@ void IFunctionOverloadResolver::checkNumberOfArguments(size_t number_of_argument
size_t expected_number_of_arguments = getNumberOfArguments();
if (number_of_arguments != expected_number_of_arguments)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
throw Exception(
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Number of arguments for function {} doesn't match: passed {}, should be {}",
getName(),
number_of_arguments,
@ -437,9 +534,8 @@ DataTypePtr IFunctionOverloadResolver::getReturnType(const ColumnsWithTypeAndNam
auto type_without_low_cardinality = getReturnTypeWithoutLowCardinality(args_without_low_cardinality);
if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality
&& num_full_low_cardinality_columns <= 1 && num_full_ordinary_columns == 0
&& type_without_low_cardinality->canBeInsideLowCardinality())
if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality && num_full_low_cardinality_columns <= 1
&& num_full_ordinary_columns == 0 && type_without_low_cardinality->canBeInsideLowCardinality())
return std::make_shared<DataTypeLowCardinality>(type_without_low_cardinality);
return type_without_low_cardinality;
}
@ -561,7 +657,7 @@ llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const ValuesWith
ValuesWithType unwrapped_arguments;
unwrapped_arguments.reserve(arguments.size());
std::vector<llvm::Value*> is_null_values;
std::vector<llvm::Value *> is_null_values;
for (size_t i = 0; i < arguments.size(); ++i)
{

View File

@ -45,6 +45,7 @@ using OptionalFieldInterval = std::optional<FieldInterval>;
class IExecutableFunction
{
public:
IExecutableFunction();
virtual ~IExecutableFunction() = default;
@ -120,6 +121,9 @@ private:
ColumnPtr executeWithoutSparseColumns(
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const;
bool short_circuit_function_evaluation_for_nulls = false;
double short_circuit_function_evaluation_for_nulls_threshold = 0.0;
};
using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>;

View File

@ -7,6 +7,7 @@
#include <thrift/transport/TBufferTransports.h>
#include <thrift/transport/TSocket.h>
#include <Storages/Hive/HiveFile.h>
#include <Poco/URI.h>
namespace DB

View File

@ -8,6 +8,7 @@
#include <Poco/URI.h>
#include <Common/logger_useful.h>
#include <Common/CurrentMetrics.h>
#include <Common/RemoteHostFilter.h>
#include <Columns/IColumn.h>
#include <Core/Block.h>

View File

@ -2,6 +2,7 @@
#if USE_HDFS
#include <Common/logger_useful.h>
#include <Common/RemoteHostFilter.h>
#include <Core/Settings.h>
#include <Parsers/IAST.h>
#include <Formats/FormatFactory.h>

View File

@ -4,7 +4,7 @@ DROP TABLE IF EXISTS null_in__fuzz_6;
set allow_suspicious_low_cardinality_types = 1;
CREATE TABLE null_in__fuzz_6 (`dt` LowCardinality(UInt16), `idx` Int32, `i` Nullable(Int256), `s` Int32) ENGINE = MergeTree PARTITION BY dt ORDER BY idx;
insert into null_in__fuzz_6 select * from generateRandom() limit 1;
insert into null_in__fuzz_6 select * from generateRandom() where i is not null limit 1;
SET transform_null_in = 0;