mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge pull request #60129 from bigo-sg/short_circut_func
Short circuit optimization for functions executed over Nullable arguments
This commit is contained in:
commit
e3e4e45278
@ -24,7 +24,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
|
||||
|
||||
ssize_t from = data.size() - 1;
|
||||
ssize_t index = mask.size() - 1;
|
||||
data.resize(mask.size());
|
||||
data.resize_exact(mask.size());
|
||||
while (index >= 0)
|
||||
{
|
||||
if (!!mask[index] ^ inverted)
|
||||
|
@ -61,6 +61,8 @@
|
||||
M(ArenaAllocBytes, "Number of bytes allocated for memory Arena (used for GROUP BY and similar operations)", ValueType::Bytes) \
|
||||
M(FunctionExecute, "Number of SQL ordinary function calls (SQL functions are called on per-block basis, so this number represents the number of blocks).", ValueType::Number) \
|
||||
M(TableFunctionExecute, "Number of table function calls.", ValueType::Number) \
|
||||
M(DefaultImplementationForNullsRows, "Number of rows processed by default implementation for nulls in function execution", ValueType::Number) \
|
||||
M(DefaultImplementationForNullsRowsWithNulls, "Number of rows which contain null values processed by default implementation for nulls in function execution", ValueType::Number) \
|
||||
M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.", ValueType::Number) \
|
||||
M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \
|
||||
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \
|
||||
|
@ -5520,6 +5520,13 @@ Only available in ClickHouse Cloud. Number of background threads for speculative
|
||||
)", 0) \
|
||||
DECLARE(Int64, ignore_cold_parts_seconds, 0, R"(
|
||||
Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.
|
||||
)", 0) \
|
||||
DECLARE(Bool, short_circuit_function_evaluation_for_nulls, true, R"(
|
||||
Allows to execute functions with Nullable arguments only on rows with non-NULL values in all arguments when ratio of NULL values in arguments exceeds short_circuit_function_evaluation_for_nulls_threshold. Applies only to functions that return NULL value for rows with at least one NULL value in arguments.
|
||||
)", 0) \
|
||||
DECLARE(Double, short_circuit_function_evaluation_for_nulls_threshold, 1.0, R"(
|
||||
Ratio threshold of NULL values to execute functions with Nullable arguments only on rows with non-NULL values in all arguments. Applies when setting short_circuit_function_evaluation_for_nulls is enabled.
|
||||
When the ratio of rows containing NULL values to the total number of rows exceeds this threshold, these rows containing NULL values will not be evaluated.
|
||||
)", 0) \
|
||||
DECLARE(Int64, prefer_warmed_unmerged_parts_seconds, 0, R"(
|
||||
Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.
|
||||
|
@ -64,6 +64,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
|
||||
},
|
||||
{"24.11",
|
||||
{
|
||||
{"short_circuit_function_evaluation_for_nulls", false, true, "Allow to execute functions with Nullable arguments only on rows with non-NULL values in all arguments"},
|
||||
{"short_circuit_function_evaluation_for_nulls_threshold", 1.0, 1.0, "Ratio threshold of NULL values to execute functions with Nullable arguments only on rows with non-NULL values in all arguments. Applies when setting short_circuit_function_evaluation_for_nulls is enabled."},
|
||||
{"enable_http_compression", false, true, "Improvement for read-only clients since they can't change settings"},
|
||||
{"validate_mutation_query", false, true, "New setting to validate mutation queries by default."},
|
||||
{"enable_job_stack_trace", false, true, "Enable by default collecting stack traces from job's scheduling."},
|
||||
|
@ -1,10 +1,10 @@
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <Common/assert_cast.h>
|
||||
|
||||
|
||||
@ -13,11 +13,11 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int SIZES_OF_ARRAYS_DONT_MATCH;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * column)
|
||||
@ -27,8 +27,7 @@ const ColumnConst * checkAndGetColumnConstStringOrFixedString(const IColumn * co
|
||||
|
||||
const ColumnConst * res = assert_cast<const ColumnConst *>(column);
|
||||
|
||||
if (checkColumn<ColumnString>(&res->getDataColumn())
|
||||
|| checkColumn<ColumnFixedString>(&res->getDataColumn()))
|
||||
if (checkColumn<ColumnString>(&res->getDataColumn()) || checkColumn<ColumnFixedString>(&res->getDataColumn()))
|
||||
return res;
|
||||
|
||||
return {};
|
||||
@ -101,18 +100,22 @@ String withOrdinalEnding(size_t i)
|
||||
{
|
||||
switch (i)
|
||||
{
|
||||
case 0: return "1st";
|
||||
case 1: return "2nd";
|
||||
case 2: return "3rd";
|
||||
default: return std::to_string(i) + "th";
|
||||
case 0:
|
||||
return "1st";
|
||||
case 1:
|
||||
return "2nd";
|
||||
case 2:
|
||||
return "3rd";
|
||||
default:
|
||||
return std::to_string(i) + "th";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void validateArgumentsImpl(const IFunction & func,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t argument_offset,
|
||||
const FunctionArgumentDescriptors & descriptors)
|
||||
void validateArgumentsImpl(
|
||||
const IFunction & func,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
size_t argument_offset,
|
||||
const FunctionArgumentDescriptors & descriptors)
|
||||
{
|
||||
for (size_t i = 0; i < descriptors.size(); ++i)
|
||||
{
|
||||
@ -123,13 +126,14 @@ void validateArgumentsImpl(const IFunction & func,
|
||||
const auto & arg = arguments[i + argument_offset];
|
||||
const auto & descriptor = descriptors[i];
|
||||
if (int error_code = descriptor.isValid(arg.type, arg.column); error_code != 0)
|
||||
throw Exception(error_code,
|
||||
"A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
|
||||
withOrdinalEnding(argument_offset + i),
|
||||
descriptor.name,
|
||||
func.getName(),
|
||||
descriptor.type_name,
|
||||
arg.type ? arg.type->getName() : "<?>");
|
||||
throw Exception(
|
||||
error_code,
|
||||
"A value of illegal type was provided as {} argument '{}' to function '{}'. Expected: {}, got: {}",
|
||||
withOrdinalEnding(argument_offset + i),
|
||||
descriptor.name,
|
||||
func.getName(),
|
||||
descriptor.type_name,
|
||||
arg.type ? arg.type->getName() : "<?>");
|
||||
}
|
||||
}
|
||||
|
||||
@ -149,26 +153,35 @@ int FunctionArgumentDescriptor::isValid(const DataTypePtr & data_type, const Col
|
||||
return 0;
|
||||
}
|
||||
|
||||
void validateFunctionArguments(const IFunction & func,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
const FunctionArgumentDescriptors & mandatory_args,
|
||||
const FunctionArgumentDescriptors & optional_args)
|
||||
void validateFunctionArguments(
|
||||
const IFunction & func,
|
||||
const ColumnsWithTypeAndName & arguments,
|
||||
const FunctionArgumentDescriptors & mandatory_args,
|
||||
const FunctionArgumentDescriptors & optional_args)
|
||||
{
|
||||
if (arguments.size() < mandatory_args.size() || arguments.size() > mandatory_args.size() + optional_args.size())
|
||||
{
|
||||
auto argument_singular_or_plural = [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
|
||||
auto argument_singular_or_plural
|
||||
= [](const auto & args) -> std::string_view { return args.size() == 1 ? "argument" : "arguments"; };
|
||||
|
||||
String expected_args_string;
|
||||
if (!mandatory_args.empty() && !optional_args.empty())
|
||||
expected_args_string = fmt::format("{} mandatory {} and {} optional {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args), optional_args.size(), argument_singular_or_plural(optional_args));
|
||||
expected_args_string = fmt::format(
|
||||
"{} mandatory {} and {} optional {}",
|
||||
mandatory_args.size(),
|
||||
argument_singular_or_plural(mandatory_args),
|
||||
optional_args.size(),
|
||||
argument_singular_or_plural(optional_args));
|
||||
else if (!mandatory_args.empty() && optional_args.empty())
|
||||
expected_args_string = fmt::format("{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
|
||||
expected_args_string = fmt::format(
|
||||
"{} {}", mandatory_args.size(), argument_singular_or_plural(mandatory_args)); /// intentionally not "_mandatory_ arguments"
|
||||
else if (mandatory_args.empty() && !optional_args.empty())
|
||||
expected_args_string = fmt::format("{} optional {}", optional_args.size(), argument_singular_or_plural(optional_args));
|
||||
else
|
||||
expected_args_string = "0 arguments";
|
||||
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"An incorrect number of arguments was specified for function '{}'. Expected {}, got {}",
|
||||
func.getName(),
|
||||
expected_args_string,
|
||||
@ -204,7 +217,8 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments)
|
||||
return {nested_columns, offsets->data()};
|
||||
}
|
||||
|
||||
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count)
|
||||
ColumnPtr
|
||||
wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count)
|
||||
{
|
||||
ColumnPtr result_null_map_column;
|
||||
|
||||
@ -262,6 +276,39 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & a
|
||||
return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column);
|
||||
}
|
||||
|
||||
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnPtr & null_map)
|
||||
{
|
||||
if (src->onlyNull())
|
||||
return src;
|
||||
|
||||
ColumnPtr result_null_map_column;
|
||||
ColumnPtr src_not_nullable = src;
|
||||
if (const auto * nullable = checkAndGetColumn<ColumnNullable>(src.get()))
|
||||
{
|
||||
src_not_nullable = nullable->getNestedColumnPtr();
|
||||
result_null_map_column = nullable->getNullMapColumnPtr();
|
||||
|
||||
MutableColumnPtr mutable_result_null_map_column = IColumn::mutate(std::move(result_null_map_column));
|
||||
NullMap & result_null_map = assert_cast<ColumnUInt8 &>(*mutable_result_null_map_column).getData();
|
||||
const NullMap & null_map_data = assert_cast<const ColumnUInt8 &>(*null_map).getData();
|
||||
for (size_t i = 0; i < result_null_map.size(); ++i)
|
||||
result_null_map[i] |= null_map_data[i];
|
||||
|
||||
result_null_map_column = std::move(mutable_result_null_map_column);
|
||||
return ColumnNullable::create(src_not_nullable->convertToFullColumnIfConst(), result_null_map_column);
|
||||
}
|
||||
else if (const auto * const_src = checkAndGetColumn<ColumnConst>(src.get()))
|
||||
{
|
||||
const NullMap & null_map_data = assert_cast<const ColumnUInt8 &>(*null_map).getData();
|
||||
ColumnPtr result_null_map = ColumnUInt8::create(1, null_map_data[0] || const_src->isNullAt(0));
|
||||
const auto * nullable_data = checkAndGetColumn<ColumnNullable>(&const_src->getDataColumn());
|
||||
auto data_not_nullable = nullable_data ? nullable_data->getNestedColumnPtr() : const_src->getDataColumnPtr();
|
||||
return ColumnConst::create(ColumnNullable::create(data_not_nullable, result_null_map), const_src->size());
|
||||
}
|
||||
else
|
||||
return ColumnNullable::create(src->convertToFullColumnIfConst(), null_map);
|
||||
}
|
||||
|
||||
NullPresence getNullPresense(const ColumnsWithTypeAndName & args)
|
||||
{
|
||||
NullPresence res;
|
||||
|
@ -169,6 +169,11 @@ checkAndGetNestedArrayOffset(const IColumn ** columns, size_t num_arguments);
|
||||
/// Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
|
||||
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count);
|
||||
|
||||
/** Return ColumnNullable of src, with input null map
|
||||
* Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
|
||||
*/
|
||||
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnPtr & null_map);
|
||||
|
||||
struct NullPresence
|
||||
{
|
||||
bool has_nullable = false;
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include <Functions/IFunctionAdaptors.h>
|
||||
#include <Functions/FunctionDynamicAdaptor.h>
|
||||
#include <Functions/IFunctionAdaptors.h>
|
||||
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
@ -7,13 +7,18 @@
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnSparse.h>
|
||||
#include <Columns/ColumnTuple.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/TypeId.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeNothing.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/Native.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/SipHash.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -27,15 +32,26 @@
|
||||
# include <llvm/IR/IRBuilder.h>
|
||||
#endif
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event DefaultImplementationForNullsRows;
|
||||
extern const Event DefaultImplementationForNullsRowsWithNulls;
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace Setting
|
||||
{
|
||||
extern const SettingsBool short_circuit_function_evaluation_for_nulls;
|
||||
extern const SettingsDouble short_circuit_function_evaluation_for_nulls_threshold;
|
||||
}
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -67,9 +83,7 @@ ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
|
||||
const auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
|
||||
|
||||
if (!low_cardinality_type)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Incompatible type for LowCardinality column: {}",
|
||||
column.type->getName());
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incompatible type for LowCardinality column: {}", column.type->getName());
|
||||
|
||||
if (can_be_executed_on_default_arguments)
|
||||
{
|
||||
@ -122,10 +136,7 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
|
||||
/// Check that these arguments are really constant.
|
||||
for (auto arg_num : arguments_to_remain_constants)
|
||||
if (arg_num < args.size() && !isColumnConst(*args[arg_num].column))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Argument at index {} for function {} must be constant",
|
||||
arg_num,
|
||||
getName());
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument at index {} for function {} must be constant", arg_num, getName());
|
||||
|
||||
if (args.empty() || !useDefaultImplementationForConstants() || !allArgumentsAreConstants(args))
|
||||
return nullptr;
|
||||
@ -139,14 +150,16 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
|
||||
{
|
||||
const ColumnWithTypeAndName & column = args[arg_num];
|
||||
|
||||
if (arguments_to_remain_constants.end() != std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
|
||||
if (arguments_to_remain_constants.end()
|
||||
!= std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
|
||||
{
|
||||
temporary_columns.emplace_back(ColumnWithTypeAndName{column.column->cloneResized(1), column.type, column.name});
|
||||
}
|
||||
else
|
||||
{
|
||||
have_converted_columns = true;
|
||||
temporary_columns.emplace_back(ColumnWithTypeAndName{ assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name });
|
||||
temporary_columns.emplace_back(
|
||||
ColumnWithTypeAndName{assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name});
|
||||
}
|
||||
}
|
||||
|
||||
@ -154,7 +167,8 @@ ColumnPtr IExecutableFunction::defaultImplementationForConstantArguments(
|
||||
* not in "arguments_to_remain_constants" set. Otherwise we get infinite recursion.
|
||||
*/
|
||||
if (!have_converted_columns)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: the function requires more arguments",
|
||||
getName());
|
||||
|
||||
@ -177,7 +191,7 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
|
||||
|
||||
NullPresence null_presence = getNullPresense(args);
|
||||
|
||||
if (null_presence.has_null_constant)
|
||||
if (null_presence.has_null_constant || null_presence.has_nullable)
|
||||
{
|
||||
// Default implementation for nulls returns null result for null arguments,
|
||||
// so the result type must be nullable.
|
||||
@ -188,17 +202,87 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
|
||||
"is expected to return Nullable result, got {}",
|
||||
getName(),
|
||||
result_type->getName());
|
||||
}
|
||||
|
||||
if (null_presence.has_null_constant)
|
||||
{
|
||||
/// If any of the input arguments is null literal, the result is null constant.
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
}
|
||||
|
||||
if (null_presence.has_nullable)
|
||||
{
|
||||
/// Usually happens during analyzing. We should return non-const column to avoid wrong constant folding.
|
||||
if (input_rows_count == 0)
|
||||
return result_type->createColumn();
|
||||
|
||||
auto result_null_map = ColumnUInt8::create(input_rows_count, 0);
|
||||
auto & result_null_map_data = result_null_map->getData();
|
||||
bool all_columns_constant = true;
|
||||
for (const auto & arg : args)
|
||||
{
|
||||
if (!isColumnConst(*arg.column))
|
||||
all_columns_constant = false;
|
||||
|
||||
if (arg.type->isNullable())
|
||||
{
|
||||
if (isColumnConst(*arg.column))
|
||||
{
|
||||
if (arg.column->onlyNull())
|
||||
{
|
||||
/// If any of input columns contains a null constant, the result is null constant.
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
const auto & null_map = assert_cast<const ColumnNullable &>(*arg.column).getNullMapData();
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
result_null_map_data[i] |= null_map[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t rows_with_nulls = countBytesInFilter(result_null_map_data.data(), 0, input_rows_count);
|
||||
size_t rows_without_nulls = input_rows_count - rows_with_nulls;
|
||||
ProfileEvents::increment(ProfileEvents::DefaultImplementationForNullsRows, input_rows_count);
|
||||
ProfileEvents::increment(ProfileEvents::DefaultImplementationForNullsRowsWithNulls, rows_with_nulls);
|
||||
|
||||
if (rows_without_nulls == 0 && !all_columns_constant)
|
||||
{
|
||||
/// Don't need to evaluate function if each row contains at least one null value and not all input columns are constant.
|
||||
return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst();
|
||||
}
|
||||
|
||||
double null_ratio = rows_with_nulls / static_cast<double>(result_null_map_data.size());
|
||||
bool should_short_circuit = short_circuit_function_evaluation_for_nulls && !all_columns_constant
|
||||
&& null_ratio >= short_circuit_function_evaluation_for_nulls_threshold;
|
||||
|
||||
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
|
||||
auto temporary_result_type = removeNullable(result_type);
|
||||
|
||||
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
|
||||
return wrapInNullable(res, args, result_type, input_rows_count);
|
||||
if (!should_short_circuit)
|
||||
{
|
||||
/// Each row should be evaluated if there are no nulls or short circuiting is disabled.
|
||||
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
|
||||
auto new_res = wrapInNullable(res, std::move(result_null_map));
|
||||
return new_res;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// If short circuit is enabled, we only execute the function on rows with all arguments not null
|
||||
|
||||
/// Filter every column by mask
|
||||
for (auto & col : temporary_columns)
|
||||
col.column = col.column->filter(result_null_map_data, rows_without_nulls);
|
||||
|
||||
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, rows_without_nulls, dry_run);
|
||||
auto mutable_res = IColumn::mutate(std::move(res));
|
||||
mutable_res->expand(result_null_map_data, false);
|
||||
|
||||
auto new_res = wrapInNullable(std::move(mutable_res), std::move(result_null_map));
|
||||
return new_res;
|
||||
}
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
@ -260,7 +344,21 @@ static void convertSparseColumnsToFull(ColumnsWithTypeAndName & args)
|
||||
column.column = recursiveRemoveSparse(column.column);
|
||||
}
|
||||
|
||||
ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
|
||||
IExecutableFunction::IExecutableFunction()
|
||||
{
|
||||
if (CurrentThread::isInitialized())
|
||||
{
|
||||
auto query_context = CurrentThread::get().getQueryContext();
|
||||
if (query_context && query_context->getSettingsRef()[Setting::short_circuit_function_evaluation_for_nulls])
|
||||
{
|
||||
short_circuit_function_evaluation_for_nulls = true;
|
||||
short_circuit_function_evaluation_for_nulls_threshold = query_context->getSettingsRef()[Setting::short_circuit_function_evaluation_for_nulls_threshold];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ColumnPtr IExecutableFunction::executeWithoutSparseColumns(
|
||||
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
|
||||
{
|
||||
ColumnPtr result;
|
||||
if (useDefaultImplementationForLowCardinalityColumns())
|
||||
@ -273,19 +371,16 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
|
||||
|
||||
const auto & dictionary_type = res_low_cardinality_type->getDictionaryType();
|
||||
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
|
||||
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
|
||||
columns_without_low_cardinality, can_be_executed_on_default_arguments, input_rows_count);
|
||||
|
||||
size_t new_input_rows_count = columns_without_low_cardinality.empty()
|
||||
? input_rows_count
|
||||
: columns_without_low_cardinality.front().column->size();
|
||||
size_t new_input_rows_count
|
||||
= columns_without_low_cardinality.empty() ? input_rows_count : columns_without_low_cardinality.front().column->size();
|
||||
checkFunctionArgumentSizes(columns_without_low_cardinality, new_input_rows_count);
|
||||
|
||||
auto res = executeWithoutLowCardinalityColumns(columns_without_low_cardinality, dictionary_type, new_input_rows_count, dry_run);
|
||||
bool res_is_constant = isColumnConst(*res);
|
||||
|
||||
auto keys = res_is_constant
|
||||
? res->cloneResized(1)->convertToFullColumnIfConst()
|
||||
: res;
|
||||
auto keys = res_is_constant ? res->cloneResized(1)->convertToFullColumnIfConst() : res;
|
||||
|
||||
auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType());
|
||||
ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size());
|
||||
@ -311,7 +406,8 @@ ColumnPtr IExecutableFunction::executeWithoutSparseColumns(const ColumnsWithType
|
||||
return result;
|
||||
}
|
||||
|
||||
ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
|
||||
ColumnPtr IExecutableFunction::execute(
|
||||
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const
|
||||
{
|
||||
checkFunctionArgumentSizes(arguments, input_rows_count);
|
||||
|
||||
@ -372,7 +468,7 @@ ColumnPtr IExecutableFunction::execute(const ColumnsWithTypeAndName & arguments,
|
||||
if (!result_type->canBeInsideSparseColumns() || !res->isDefaultAt(0) || res->getNumberOfDefaultRows() != 1)
|
||||
{
|
||||
const auto & offsets_data = assert_cast<const ColumnVector<UInt64> &>(*sparse_offsets).getData();
|
||||
return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/ 1);
|
||||
return res->createWithOffsets(offsets_data, *createColumnConst(res, 0), input_rows_count, /*shift=*/1);
|
||||
}
|
||||
|
||||
return ColumnSparse::create(res, sparse_offsets, input_rows_count);
|
||||
@ -398,7 +494,8 @@ void IFunctionOverloadResolver::checkNumberOfArguments(size_t number_of_argument
|
||||
size_t expected_number_of_arguments = getNumberOfArguments();
|
||||
|
||||
if (number_of_arguments != expected_number_of_arguments)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be {}",
|
||||
getName(),
|
||||
number_of_arguments,
|
||||
@ -437,9 +534,8 @@ DataTypePtr IFunctionOverloadResolver::getReturnType(const ColumnsWithTypeAndNam
|
||||
|
||||
auto type_without_low_cardinality = getReturnTypeWithoutLowCardinality(args_without_low_cardinality);
|
||||
|
||||
if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality
|
||||
&& num_full_low_cardinality_columns <= 1 && num_full_ordinary_columns == 0
|
||||
&& type_without_low_cardinality->canBeInsideLowCardinality())
|
||||
if (canBeExecutedOnLowCardinalityDictionary() && has_low_cardinality && num_full_low_cardinality_columns <= 1
|
||||
&& num_full_ordinary_columns == 0 && type_without_low_cardinality->canBeInsideLowCardinality())
|
||||
return std::make_shared<DataTypeLowCardinality>(type_without_low_cardinality);
|
||||
return type_without_low_cardinality;
|
||||
}
|
||||
@ -561,7 +657,7 @@ llvm::Value * IFunction::compile(llvm::IRBuilderBase & builder, const ValuesWith
|
||||
ValuesWithType unwrapped_arguments;
|
||||
unwrapped_arguments.reserve(arguments.size());
|
||||
|
||||
std::vector<llvm::Value*> is_null_values;
|
||||
std::vector<llvm::Value *> is_null_values;
|
||||
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
{
|
||||
|
@ -45,6 +45,7 @@ using OptionalFieldInterval = std::optional<FieldInterval>;
|
||||
class IExecutableFunction
|
||||
{
|
||||
public:
|
||||
IExecutableFunction();
|
||||
|
||||
virtual ~IExecutableFunction() = default;
|
||||
|
||||
@ -120,6 +121,9 @@ private:
|
||||
|
||||
ColumnPtr executeWithoutSparseColumns(
|
||||
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const;
|
||||
|
||||
bool short_circuit_function_evaluation_for_nulls = false;
|
||||
double short_circuit_function_evaluation_for_nulls_threshold = 0.0;
|
||||
};
|
||||
|
||||
using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>;
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <thrift/transport/TBufferTransports.h>
|
||||
#include <thrift/transport/TSocket.h>
|
||||
#include <Storages/Hive/HiveFile.h>
|
||||
#include <Poco/URI.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <Poco/URI.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Core/Block.h>
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#if USE_HDFS
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
#include <Core/Settings.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
|
@ -4,7 +4,7 @@ DROP TABLE IF EXISTS null_in__fuzz_6;
|
||||
set allow_suspicious_low_cardinality_types = 1;
|
||||
|
||||
CREATE TABLE null_in__fuzz_6 (`dt` LowCardinality(UInt16), `idx` Int32, `i` Nullable(Int256), `s` Int32) ENGINE = MergeTree PARTITION BY dt ORDER BY idx;
|
||||
insert into null_in__fuzz_6 select * from generateRandom() limit 1;
|
||||
insert into null_in__fuzz_6 select * from generateRandom() where i is not null limit 1;
|
||||
|
||||
SET transform_null_in = 0;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user