add metrics, add settings

This commit is contained in:
taiyang-li 2024-10-18 12:23:16 +08:00
parent a957874f72
commit 9875a50754
5 changed files with 33 additions and 10 deletions

View File

@ -61,6 +61,8 @@
M(ArenaAllocBytes, "Number of bytes allocated for memory Arena (used for GROUP BY and similar operations)", ValueType::Bytes) \ M(ArenaAllocBytes, "Number of bytes allocated for memory Arena (used for GROUP BY and similar operations)", ValueType::Bytes) \
M(FunctionExecute, "Number of SQL ordinary function calls (SQL functions are called on per-block basis, so this number represents the number of blocks).", ValueType::Number) \ M(FunctionExecute, "Number of SQL ordinary function calls (SQL functions are called on per-block basis, so this number represents the number of blocks).", ValueType::Number) \
M(TableFunctionExecute, "Number of table function calls.", ValueType::Number) \ M(TableFunctionExecute, "Number of table function calls.", ValueType::Number) \
M(DefaultImplementationForNullsRows, "Number of rows processed by default implementation for nulls in function execution", ValueType::Number) \
M(DefaultImplementationForNullsRowsWithNulls, "Number of rows which contain null values processed by default implementation for nulls in function execution", ValueType::Number) \
M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.", ValueType::Number) \ M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.", ValueType::Number) \
M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \ M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.", ValueType::Number) \
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \ M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.", ValueType::Number) \

View File

@ -5459,8 +5459,12 @@ Only available in ClickHouse Cloud. Number of background threads for speculative
Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree. Only available in ClickHouse Cloud. Exclude new data parts from SELECT queries until they're either pre-warmed (see cache_populated_by_fetch) or this many seconds old. Only for Replicated-/SharedMergeTree.
)", 0) \ )", 0) \
M(Bool, short_circuit_default_implementation_for_nulls, true, R"( M(Bool, short_circuit_default_implementation_for_nulls, true, R"(
Setting for short-circuit default implementations for null in function with useDefaultImplementationForNulls() = true. Setting for short-circuit default implementations for nulls in function with useDefaultImplementationForNulls() = true.
If true, function will not actually evaluate for rows in which there are at least one argument with null value. If true, function will not actually evaluate for rows in which there are at least one argument with null value.
)", 0) \
M(Double, short_circuit_default_implementation_for_nulls_threshold, 1.0, R"(
Ratio threshold for short-circuit default implementations for nulls in function with useDefaultImplementationForNulls() = true. It is only valid when short_circuit_default_implementation_for_nulls is true.
When the ratio of rows containing nulls to the total number of rows exceeds this threshold, these rows containing nulls would not be evaluated.
)", 0) \ )", 0) \
M(Int64, prefer_warmed_unmerged_parts_seconds, 0, R"( M(Int64, prefer_warmed_unmerged_parts_seconds, 0, R"(
Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm. Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.

View File

@ -98,6 +98,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"distributed_cache_max_unacked_inflight_packets", 10, 10, "A setting for ClickHouse Cloud"}, {"distributed_cache_max_unacked_inflight_packets", 10, 10, "A setting for ClickHouse Cloud"},
{"distributed_cache_data_packet_ack_window", 5, 5, "A setting for ClickHouse Cloud"}, {"distributed_cache_data_packet_ack_window", 5, 5, "A setting for ClickHouse Cloud"},
{"short_circuit_default_implementation_for_nulls", false, true, "Setting for short-circuit default implementations for null in function with useDefaultImplementationForNulls() = true. If true, function will not actually evaluate for rows in which there are at least one argument with null value."}, {"short_circuit_default_implementation_for_nulls", false, true, "Setting for short-circuit default implementations for null in function with useDefaultImplementationForNulls() = true. If true, function will not actually evaluate for rows in which there are at least one argument with null value."},
{"short_circuit_default_implementation_for_nulls_threshold", 0.0, 0.0, "Ratio threshold for short-circuit default implementations for nulls in function with useDefaultImplementationForNulls() = true. It is only valid when short_circuit_default_implementation_for_nulls is true."},
{"max_parts_to_move", 1000, 1000, "New setting"}, {"max_parts_to_move", 1000, 1000, "New setting"},
} }
}, },

View File

@ -31,6 +31,11 @@
# include <llvm/IR/IRBuilder.h> # include <llvm/IR/IRBuilder.h>
#endif #endif
namespace ProfileEvents
{
extern const Event DefaultImplementationForNullsRows;
extern const Event DefaultImplementationForNullsRowsWithNulls;
}
namespace DB namespace DB
{ {
@ -38,6 +43,7 @@ namespace DB
namespace Setting namespace Setting
{ {
extern const SettingsBool short_circuit_default_implementation_for_nulls; extern const SettingsBool short_circuit_default_implementation_for_nulls;
extern const SettingsDouble short_circuit_default_implementation_for_nulls_threshold;
} }
namespace ErrorCodes namespace ErrorCodes
@ -244,12 +250,21 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
/// Don't need to evaluate function if each row contains at least one null value and not all input columns are constant. /// Don't need to evaluate function if each row contains at least one null value and not all input columns are constant.
return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst(); return result_type->createColumnConstWithDefaultValue(input_rows_count)->convertToFullColumnIfConst();
} }
else if (!mask_info.has_zeros || all_columns_constant || !short_circuit_default_implementation_for_nulls)
size_t rows_without_nulls = countBytesInFilter(mask.data(), 0, mask.size());
size_t rows_with_nulls = mask.size() - rows_without_nulls;
double null_ratio = rows_with_nulls / static_cast<double>(mask.size());
bool should_short_circuit = short_circuit_default_implementation_for_nulls && !all_columns_constant
&& null_ratio >= short_circuit_default_implementation_for_nulls_threshold;
ProfileEvents::increment(ProfileEvents::DefaultImplementationForNullsRows, mask.size());
ProfileEvents::increment(ProfileEvents::DefaultImplementationForNullsRowsWithNulls, rows_with_nulls);
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
if (!should_short_circuit)
{ {
/// Each row should be evaluated if there are no nulls or short circuiting is disabled. /// Each row should be evaluated if there are no nulls or short circuiting is disabled.
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run); auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, input_rows_count, dry_run);
/// Invert mask as null map /// Invert mask as null map
@ -263,15 +278,12 @@ ColumnPtr IExecutableFunction::defaultImplementationForNulls(
else else
{ {
/// If short circuit is enabled, we only execute the function on rows with all arguments not null /// If short circuit is enabled, we only execute the function on rows with all arguments not null
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(args);
auto temporary_result_type = removeNullable(result_type);
/// Filter every column by mask /// Filter every column by mask
size_t size_hint = countBytesInFilter(mask.data(), 0, mask.size());
for (auto & col : temporary_columns) for (auto & col : temporary_columns)
col.column = col.column->filter(mask, size_hint); col.column = col.column->filter(mask, rows_without_nulls);
auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, size_hint, dry_run); auto res = executeWithoutLowCardinalityColumns(temporary_columns, temporary_result_type, rows_without_nulls, dry_run);
auto mutable_res = IColumn::mutate(std::move(res)); auto mutable_res = IColumn::mutate(std::move(res));
mutable_res->expand(mask, false); mutable_res->expand(mask, false);
@ -350,7 +362,10 @@ IExecutableFunction::IExecutableFunction()
{ {
auto query_context = CurrentThread::get().getQueryContext(); auto query_context = CurrentThread::get().getQueryContext();
if (query_context && query_context->getSettingsRef()[Setting::short_circuit_default_implementation_for_nulls]) if (query_context && query_context->getSettingsRef()[Setting::short_circuit_default_implementation_for_nulls])
{
short_circuit_default_implementation_for_nulls = true; short_circuit_default_implementation_for_nulls = true;
short_circuit_default_implementation_for_nulls_threshold = query_context->getSettingsRef()[Setting::short_circuit_default_implementation_for_nulls_threshold];
}
} }
} }

View File

@ -123,6 +123,7 @@ private:
const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const; const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run) const;
bool short_circuit_default_implementation_for_nulls = false; bool short_circuit_default_implementation_for_nulls = false;
double short_circuit_default_implementation_for_nulls_threshold = 0.0;
}; };
using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>; using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>;