diff --git a/dbms/src/Interpreters/ExpressionJIT.cpp b/dbms/src/Interpreters/ExpressionJIT.cpp index 2d9a953352a..0bc5c683d04 100644 --- a/dbms/src/Interpreters/ExpressionJIT.cpp +++ b/dbms/src/Interpreters/ExpressionJIT.cpp @@ -705,8 +705,6 @@ void compileFunctions(ExpressionActions::Actions & actions, const Names & output static LLVMTargetInitializer initializer; auto dependents = getActionsDependents(actions, output_columns); - /// Initialize context as late as possible and only if needed - std::shared_ptr context; std::vector fused(actions.size()); for (size_t i = 0; i < actions.size(); ++i) { @@ -722,7 +720,7 @@ void compileFunctions(ExpressionActions::Actions & actions, const Names & output auto hash_key = ExpressionActions::ActionsHash{}(fused[i]); { - std::lock_guard lock(mutex); + std::lock_guard lock(mutex); if (counter[hash_key]++ < min_count_to_compile) continue; } @@ -730,26 +728,24 @@ void compileFunctions(ExpressionActions::Actions & actions, const Names & output std::shared_ptr fn; if (compilation_cache) { - /// Lock here, to be sure, that all functions will be compiled - std::lock_guard lock(mutex); - /// Don't use getOrSet here, because sometimes we need to initialize context - fn = compilation_cache->get(hash_key); - if (!fn) + std::tie(fn, std::ignore) = compilation_cache->getOrSet(hash_key, [&inlined_func=std::as_const(fused[i]), &sample_block] () { - if (!context) - context = std::make_shared(); Stopwatch watch; - fn = std::make_shared(fused[i], context, sample_block); + std::shared_ptr context = std::make_shared(); + auto result_fn = std::make_shared(inlined_func, context, sample_block); + size_t used_memory = context->compileAllFunctionsToNativeCode(); + ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, used_memory); ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); - compilation_cache->set(hash_key, fn); - } + return result_fn; + }); } else { - if (!context) - context = std::make_shared(); + std::shared_ptr context = context = std::make_shared(); Stopwatch watch; fn = std::make_shared(fused[i], context, sample_block); + size_t used_memory = context->compileAllFunctionsToNativeCode(); + ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, used_memory); ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); } @@ -765,20 +761,10 @@ void compileFunctions(ExpressionActions::Actions & actions, const Names & output fused[*dep].insert(fused[*dep].end(), fused[i].begin(), fused[i].end()); } - if (context) - { - /// Lock here, because other threads can get uncompilted functions from cache - std::lock_guard lock(mutex); - size_t used_memory = context->compileAllFunctionsToNativeCode(); - ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, used_memory); - } - for (size_t i = 0; i < actions.size(); ++i) { if (actions[i].type == ExpressionAction::APPLY_FUNCTION && actions[i].is_function_compiled) - { actions[i].function = actions[i].function_base->prepare({}, {}, 0); /// Arguments are not used for LLVMFunction. - } } } diff --git a/dbms/src/Interpreters/Settings.h b/dbms/src/Interpreters/Settings.h index db066ce6d72..1cc816e245e 100644 --- a/dbms/src/Interpreters/Settings.h +++ b/dbms/src/Interpreters/Settings.h @@ -74,7 +74,7 @@ struct Settings M(SettingFloat, totals_auto_threshold, 0.5, "The threshold for totals_mode = 'auto'.") \ \ M(SettingBool, compile, false, "Whether query compilation is enabled.") \ - M(SettingBool, compile_expressions, false, "Compile some scalar functions and operators to native code.") \ + M(SettingBool, compile_expressions, true, "Compile some scalar functions and operators to native code.") \ M(SettingUInt64, min_count_to_compile, 3, "The number of structurally identical queries before they are compiled.") \ M(SettingUInt64, group_by_two_level_threshold, 100000, "From what number of keys, a two-level aggregation starts. 0 - the threshold is not set.") \ M(SettingUInt64, group_by_two_level_threshold_bytes, 100000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.") \