add metrics, add settings

This commit is contained in:
taiyang-li 2024-10-18 12:23:16 +08:00
parent a957874f72
commit 9875a50754
5 changed files with 33 additions and 10 deletions

View File

@ -61,6 +61,8 @@
M(ArenaAllocBytes, "Number of bytes allocated for memory Arena (used for GROUP BY and similar operations)", ValueType::Bytes) \
M(FunctionExecute, "Number of SQL ordinary function calls (SQL functions are called on per-block basis, so this number represents the number of blocks).", ValueType::Number) \
M(TableFunctionExecute, "Number of table function calls.", ValueType::Number) \
M(DefaultImplementationForNullsRows, "Number of rows processed by default implementation for nulls in function execution", ValueType::Number) \
M(DefaultImplementationForNullsRowsWithNulls, "Number of rows which contain null values processed by default implementation for nulls in function execution", ValueType::Number) \
M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.", ValueType::Number) \
M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \

View File

@ -5459,8 +5459,12 @@ Only available in ClickHouse Cloud. Number of background threads for speculative
Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.
)", 0) \
M(Bool, short_circuit_default_implementation_for_nulls, true, R"(
Setting for short-circuit default implementations for null in function with useDefaultImplementationForNulls() = true.
Setting for short-circuit default implementations for nulls in function with useDefaultImplementationForNulls() = true.
If true, function will not actually evaluate for rows in which there are at least one argument with null value.
)", 0) \
M(Double, short_circuit_default_implementation_for_nulls_threshold, 1.0, R"(
Ratio threshold for short-circuit default implementations for nulls in function with useDefaultImplementationForNulls() = true. It is only valid when short_circuit_default_implementation_for_nulls is true.
When the ratio of rows containing nulls to the total number of rows exceeds this threshold, these rows containing nulls would not be evaluated.
)", 0) \
M(Int64, prefer_warmed_unmerged_parts_seconds, 0, R"(
Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.

View File

@ -98,6 +98,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"distributed_cache_max_unacked_inflight_packets", 10, 10, "A setting for ClickHouse Cloud"},
{"distributed_cache_data_packet_ack_window", 5, 5, "A setting for ClickHouse Cloud"},
{"short_circuit_default_implementation_for_nulls", false, true, "Setting for short-circuit default implementations for null in function with useDefaultImplementationForNulls() = true. If true, function will not actually evaluate for rows in which there are at least one argument with null value."},
{"short_circuit_default_implementation_for_nulls_threshold", 0.0, 0.0, "Ratio threshold for short-circuit default implementations for nulls in function with useDefaultImplementationForNulls() = true. It is only valid when short_circuit_default_implementation_for_nulls is true."},
{"max_parts_to_move", 1000, 1000, "New setting"},
}
},

View File

@ -31,6 +31,11 @@
# include <llvm/IR/IRBuilder.h>
#endif
namespace ProfileEvents
{
extern const Event DefaultImplementationForNullsRows;
extern const Event DefaultImplementationForNullsRowsWithNulls;
}
namespace DB
{
@ -38,6 +43,7 @@ namespace DB
namespace Setting
{
extern const SettingsBool short_circuit_default_implementation_for_nulls;
extern const SettingsDouble short_circuit_default_implementation_for_nulls_threshold;
}
namespace ErrorCodes
@ -244,12 +250,21 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
/// Don't need to evaluate function if each row contains at least one null value and not all input columns are constant.
return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst();
}
else if (!mask_info.has_zeros || all_columns_constant || !short_circuit_default_implementation_for_nulls)
size_t rows_without_nulls = countBytesInFilter(mask.data(), 0, mask.size());
size_t rows_with_nulls = mask.size() - rows_without_nulls;
double null_ratio = rows_with_nulls / static_cast<double>(mask.size());
bool should_short_circuit = short_circuit_default_implementation_for_nulls && !all_columns_constant
&& null_ratio >= short_circuit_default_implementation_for_nulls_threshold;
ProfileEvents::increment(ProfileEvents::DefaultImplementationForNullsRows, mask.size());
ProfileEvents::increment(ProfileEvents::DefaultImplementationForNullsRowsWithNulls, rows_with_nulls);
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
if (!should_short_circuit)
{
/// Each row should be evaluated if there are no nulls or short circuiting is disabled.
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
/// Invert mask as null map
@ -263,15 +278,12 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
else
{
/// If short circuit is enabled, we only execute the function on rows with all arguments not null
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
/// Filter every column by mask
size_t size_hint = countBytesInFilter(mask.data(), 0, mask.size());
for (auto & col : temporary_columns)
col.column = col.column->filter(mask, size_hint);
col.column = col.column->filter(mask, rows_without_nulls);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, size_hint, dry_run);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, rows_without_nulls, dry_run);
auto mutable_res = IColumn::mutate(std::move(res));
mutable_res->expand(mask, false);
@ -350,7 +362,10 @@ IExecutableFunction::IExecutableFunction()
{
auto query_context = CurrentThread::get().getQueryContext();
if (query_context && query_context->getSettingsRef()[Setting::short_circuit_default_implementation_for_nulls])
{
short_circuit_default_implementation_for_nulls = true;
short_circuit_default_implementation_for_nulls_threshold = query_context->getSettingsRef()[Setting::short_circuit_default_implementation_for_nulls_threshold];
}
}
}

View File

@ -123,6 +123,7 @@ private:
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const;
bool short_circuit_default_implementation_for_nulls = false;
double short_circuit_default_implementation_for_nulls_threshold = 0.0;
};
using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>;