#include #if USE_EMBEDDED_COMPILER #include #include #include #include #include #include #include #include #include #include #include #include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #pragma GCC diagnostic ignored "-Wnon-virtual-dtor" /** Y_IGNORE marker means that this header is not analyzed by Arcadia build system. * "Arcadia" is the name of internal Yandex source code repository. * ClickHouse have limited support for build in Arcadia * (ClickHouse source code is used in another Yandex products as a library). * Some libraries are not enabled when build inside Arcadia is used, * that what does Y_IGNORE indicate. */ #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #include // Y_IGNORE #pragma GCC diagnostic pop namespace ProfileEvents { extern const Event CompileFunction; extern const Event CompileExpressionsMicroseconds; extern const Event CompileExpressionsBytes; } namespace DB { namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int CANNOT_COMPILE_CODE; } namespace { struct ColumnData { const char * data = nullptr; const char * null = nullptr; size_t stride; }; struct ColumnDataPlaceholder { llvm::Value * data_init; /// first row llvm::Value * null_init; llvm::Value * stride; llvm::PHINode * data; /// current row llvm::PHINode * null; }; } static ColumnData getColumnData(const IColumn * column) { ColumnData result; const bool is_const = column->isColumnConst(); if (is_const) column = &reinterpret_cast(column)->getDataColumn(); if (auto * nullable = typeid_cast(column)) { result.null = nullable->getNullMapColumn().getRawData().data; column = &nullable->getNestedColumn(); } result.data = column->getRawData().data; result.stride = is_const ? 0 : column->sizeOfValueIfFixed(); return result; } static void applyFunction(IFunctionBase & function, Field & value) { const auto & type = function.getArgumentTypes().at(0); Block block = {{ type->createColumnConst(1, value), type, "x" }, { nullptr, function.getReturnType(), "y" }}; function.execute(block, {0}, 1, 1); block.safeGetByPosition(1).column->get(0, value); } static llvm::TargetMachine * getNativeMachine() { std::string error; auto cpu = llvm::sys::getHostCPUName(); auto triple = llvm::sys::getProcessTriple(); auto target = llvm::TargetRegistry::lookupTarget(triple, error); if (!target) throw Exception("Could not initialize native target: " + error, ErrorCodes::CANNOT_COMPILE_CODE); llvm::SubtargetFeatures features; llvm::StringMap feature_map; if (llvm::sys::getHostCPUFeatures(feature_map)) for (auto & f : feature_map) features.AddFeature(f.first(), f.second); llvm::TargetOptions options; return target->createTargetMachine( triple, cpu, features.getString(), options, llvm::None, #if LLVM_VERSION_MAJOR >= 6 llvm::None, llvm::CodeGenOpt::Default, /*jit=*/true #else llvm::CodeModel::Default, llvm::CodeGenOpt::Default #endif ); } #if LLVM_VERSION_MAJOR >= 7 auto wrapJITSymbolResolver(llvm::JITSymbolResolver & jsr) { auto flags = [&](llvm::orc::SymbolFlagsMap & flags, const llvm::orc::SymbolNameSet & symbols) { llvm::orc::SymbolNameSet missing; for (const auto & symbol : symbols) { auto resolved = jsr.lookupFlags({*symbol}); if (resolved && resolved->size()) flags.emplace(symbol, resolved->begin()->second); else missing.emplace(symbol); } return missing; }; auto symbols = [&](std::shared_ptr query, llvm::orc::SymbolNameSet symbols) { llvm::orc::SymbolNameSet missing; for (const auto & symbol : symbols) { auto resolved = jsr.lookup({*symbol}); if (resolved && resolved->size()) query->resolve(symbol, resolved->begin()->second); else missing.emplace(symbol); } return missing; }; return llvm::orc::createSymbolResolver(flags, symbols); } #endif #if LLVM_VERSION_MAJOR >= 6 struct CountingMMapper final : public llvm::SectionMemoryManager::MemoryMapper { MemoryTracker memory_tracker{VariableContext::Global}; llvm::sys::MemoryBlock allocateMappedMemory(llvm::SectionMemoryManager::AllocationPurpose /*purpose*/, size_t num_bytes, const llvm::sys::MemoryBlock * const near_block, unsigned flags, std::error_code & error_code) override { memory_tracker.alloc(num_bytes); return llvm::sys::Memory::allocateMappedMemory(num_bytes, near_block, flags, error_code); } std::error_code protectMappedMemory(const llvm::sys::MemoryBlock & block, unsigned flags) override { return llvm::sys::Memory::protectMappedMemory(block, flags); } std::error_code releaseMappedMemory(llvm::sys::MemoryBlock & block) override { memory_tracker.free(block.size()); return llvm::sys::Memory::releaseMappedMemory(block); } }; #endif struct LLVMContext { static inline std::atomic id_counter{0}; llvm::LLVMContext context; #if LLVM_VERSION_MAJOR >= 7 llvm::orc::ExecutionSession execution_session; std::unique_ptr module; #else std::shared_ptr module; #endif std::unique_ptr machine; #if LLVM_VERSION_MAJOR >= 6 std::unique_ptr memory_mapper; #endif std::shared_ptr memory_manager; llvm::orc::RTDyldObjectLinkingLayer object_layer; llvm::orc::IRCompileLayer compile_layer; llvm::DataLayout layout; llvm::IRBuilder<> builder; std::unordered_map symbols; size_t id; LLVMContext() #if LLVM_VERSION_MAJOR >= 7 : module(std::make_unique("jit", context)) #else : module(std::make_shared("jit", context)) #endif , machine(getNativeMachine()) #if LLVM_VERSION_MAJOR >= 6 , memory_mapper(std::make_unique()) , memory_manager(std::make_shared(memory_mapper.get())) #else , memory_manager(std::make_shared()) #endif #if LLVM_VERSION_MAJOR >= 7 , object_layer(execution_session, [this](llvm::orc::VModuleKey) { return llvm::orc::RTDyldObjectLinkingLayer::Resources{memory_manager, wrapJITSymbolResolver(*memory_manager)}; }) #else , object_layer([this]() { return memory_manager; }) #endif , compile_layer(object_layer, llvm::orc::SimpleCompiler(*machine)) , layout(machine->createDataLayout()) , builder(context) , id(id_counter++) { module->setDataLayout(layout); module->setTargetTriple(machine->getTargetTriple().getTriple()); } /// returns used memory size_t compileAllFunctionsToNativeCode() { if (!module->size()) return 0; llvm::PassManagerBuilder builder; llvm::legacy::PassManager mpm; llvm::legacy::FunctionPassManager fpm(module.get()); builder.OptLevel = 3; builder.SLPVectorize = true; builder.LoopVectorize = true; builder.RerollLoops = true; builder.VerifyInput = true; builder.VerifyOutput = true; machine->adjustPassManager(builder); fpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); mpm.add(llvm::createTargetTransformInfoWrapperPass(machine->getTargetIRAnalysis())); builder.populateFunctionPassManager(fpm); builder.populateModulePassManager(mpm); fpm.doInitialization(); for (auto & function : *module) fpm.run(function); fpm.doFinalization(); mpm.run(*module); std::vector functions; functions.reserve(module->size()); for (const auto & function : *module) functions.emplace_back(function.getName()); #if LLVM_VERSION_MAJOR >= 7 llvm::orc::VModuleKey module_key = execution_session.allocateVModule(); if (compile_layer.addModule(module_key, std::move(module))) throw Exception("Cannot add module to compile layer", ErrorCodes::CANNOT_COMPILE_CODE); #else if (!compile_layer.addModule(module, memory_manager)) throw Exception("Cannot add module to compile layer", ErrorCodes::CANNOT_COMPILE_CODE); #endif for (const auto & name : functions) { std::string mangled_name; llvm::raw_string_ostream mangled_name_stream(mangled_name); llvm::Mangler::getNameWithPrefix(mangled_name_stream, name, layout); mangled_name_stream.flush(); auto symbol = compile_layer.findSymbol(mangled_name, false); if (!symbol) continue; /// external function (e.g. an intrinsic that calls into libc) auto address = symbol.getAddress(); if (!address) throw Exception("Function " + name + " failed to link", ErrorCodes::CANNOT_COMPILE_CODE); symbols[name] = reinterpret_cast(*address); } #if LLVM_VERSION_MAJOR >= 6 return memory_mapper->memory_tracker.get(); #else return 0; #endif } }; class LLVMPreparedFunction : public PreparedFunctionImpl { std::string name; std::shared_ptr context; void * function; public: LLVMPreparedFunction(std::string name_, std::shared_ptr context) : name(std::move(name_)), context(context), function(context->symbols.at(name)) {} String getName() const override { return name; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t block_size) override { auto col_res = block.getByPosition(result).type->createColumn()->cloneResized(block_size); if (block_size) { std::vector columns(arguments.size() + 1); for (size_t i = 0; i < arguments.size(); ++i) { auto * column = block.getByPosition(arguments[i]).column.get(); if (!column) throw Exception("Column " + block.getByPosition(arguments[i]).name + " is missing", ErrorCodes::LOGICAL_ERROR); columns[i] = getColumnData(column); } columns[arguments.size()] = getColumnData(col_res.get()); reinterpret_cast(function)(block_size, columns.data()); } block.getByPosition(result).column = std::move(col_res); } }; static void compileFunctionToLLVMByteCode(std::shared_ptr & context, const IFunctionBase & f) { ProfileEvents::increment(ProfileEvents::CompileFunction); auto & arg_types = f.getArgumentTypes(); auto & b = context->builder; auto * size_type = b.getIntNTy(sizeof(size_t) * 8); auto * data_type = llvm::StructType::get(b.getInt8PtrTy(), b.getInt8PtrTy(), size_type); auto * func_type = llvm::FunctionType::get(b.getVoidTy(), { size_type, data_type->getPointerTo() }, /*isVarArg=*/false); auto * func = llvm::Function::Create(func_type, llvm::Function::ExternalLinkage, f.getName(), context->module.get()); auto args = func->args().begin(); llvm::Value * counter_arg = &*args++; llvm::Value * columns_arg = &*args++; auto * entry = llvm::BasicBlock::Create(b.getContext(), "entry", func); b.SetInsertPoint(entry); std::vector columns(arg_types.size() + 1); for (size_t i = 0; i <= arg_types.size(); ++i) { auto & type = i == arg_types.size() ? f.getReturnType() : arg_types[i]; auto * data = b.CreateLoad(b.CreateConstInBoundsGEP1_32(data_type, columns_arg, i)); columns[i].data_init = b.CreatePointerCast(b.CreateExtractValue(data, {0}), toNativeType(b, removeNullable(type))->getPointerTo()); columns[i].null_init = type->isNullable() ? b.CreateExtractValue(data, {1}) : nullptr; columns[i].stride = b.CreateExtractValue(data, {2}); } /// assume nonzero initial value in `counter_arg` auto * loop = llvm::BasicBlock::Create(b.getContext(), "loop", func); b.CreateBr(loop); b.SetInsertPoint(loop); auto * counter_phi = b.CreatePHI(counter_arg->getType(), 2); counter_phi->addIncoming(counter_arg, entry); for (auto & col : columns) { col.data = b.CreatePHI(col.data_init->getType(), 2); col.data->addIncoming(col.data_init, entry); if (col.null_init) { col.null = b.CreatePHI(col.null_init->getType(), 2); col.null->addIncoming(col.null_init, entry); } } ValuePlaceholders arguments(arg_types.size()); for (size_t i = 0; i < arguments.size(); ++i) { arguments[i] = [&b, &col = columns[i], &type = arg_types[i]]() -> llvm::Value * { auto * value = b.CreateLoad(col.data); if (!col.null) return value; auto * is_null = b.CreateICmpNE(b.CreateLoad(col.null), b.getInt8(0)); auto * nullable = llvm::Constant::getNullValue(toNativeType(b, type)); return b.CreateInsertValue(b.CreateInsertValue(nullable, value, {0}), is_null, {1}); }; } auto * result = f.compile(b, std::move(arguments)); if (columns.back().null) { b.CreateStore(b.CreateExtractValue(result, {0}), columns.back().data); b.CreateStore(b.CreateSelect(b.CreateExtractValue(result, {1}), b.getInt8(1), b.getInt8(0)), columns.back().null); } else { b.CreateStore(result, columns.back().data); } auto * cur_block = b.GetInsertBlock(); for (auto & col : columns) { /// stride is either 0 or size of native type; output column is never constant; neither is at least one input auto * is_const = &col == &columns.back() || columns.size() <= 2 ? b.getFalse() : b.CreateICmpEQ(col.stride, llvm::ConstantInt::get(size_type, 0)); col.data->addIncoming(b.CreateSelect(is_const, col.data, b.CreateConstInBoundsGEP1_32(nullptr, col.data, 1)), cur_block); if (col.null) col.null->addIncoming(b.CreateSelect(is_const, col.null, b.CreateConstInBoundsGEP1_32(nullptr, col.null, 1)), cur_block); } counter_phi->addIncoming(b.CreateSub(counter_phi, llvm::ConstantInt::get(size_type, 1)), cur_block); auto * end = llvm::BasicBlock::Create(b.getContext(), "end", func); b.CreateCondBr(b.CreateICmpNE(counter_phi, llvm::ConstantInt::get(size_type, 1)), loop, end); b.SetInsertPoint(end); b.CreateRetVoid(); } static llvm::Constant * getNativeValue(llvm::Type * type, const IColumn & column, size_t i) { if (!type || column.size() <= i) return nullptr; if (auto * constant = typeid_cast(&column)) return getNativeValue(type, constant->getDataColumn(), 0); if (auto * nullable = typeid_cast(&column)) { auto * value = getNativeValue(type->getContainedType(0), nullable->getNestedColumn(), i); auto * is_null = llvm::ConstantInt::get(type->getContainedType(1), nullable->isNullAt(i)); return value ? llvm::ConstantStruct::get(static_cast(type), value, is_null) : nullptr; } if (type->isFloatTy()) return llvm::ConstantFP::get(type, static_cast &>(column).getElement(i)); if (type->isDoubleTy()) return llvm::ConstantFP::get(type, static_cast &>(column).getElement(i)); if (type->isIntegerTy()) return llvm::ConstantInt::get(type, column.getUInt(i)); /// TODO: if (type->isVectorTy()) return nullptr; } /// Same as IFunctionBase::compile, but also for constants and input columns. using CompilableExpression = std::function; static CompilableExpression subexpression(ColumnPtr c, DataTypePtr type) { return [=](llvm::IRBuilderBase & b, const ValuePlaceholders &) { return getNativeValue(toNativeType(b, type), *c, 0); }; } static CompilableExpression subexpression(size_t i) { return [=](llvm::IRBuilderBase &, const ValuePlaceholders & inputs) { return inputs[i](); }; } static CompilableExpression subexpression(const IFunctionBase & f, std::vector args) { return [&, args = std::move(args)](llvm::IRBuilderBase & builder, const ValuePlaceholders & inputs) { ValuePlaceholders input; for (const auto & arg : args) input.push_back([&]() { return arg(builder, inputs); }); auto * result = f.compile(builder, input); if (result->getType() != toNativeType(builder, f.getReturnType())) throw Exception("Function " + f.getName() + " generated an llvm::Value of invalid type", ErrorCodes::LOGICAL_ERROR); return result; }; } LLVMFunction::LLVMFunction(const ExpressionActions::Actions & actions, std::shared_ptr context, const Block & sample_block) : name(actions.back().result_name), context(context) { for (const auto & c : sample_block) /// TODO: implement `getNativeValue` for all types & replace the check with `c.column && toNativeType(...)` if (c.column && getNativeValue(toNativeType(context->builder, c.type), *c.column, 0)) subexpressions[c.name] = subexpression(c.column, c.type); for (const auto & action : actions) { const auto & names = action.argument_names; const auto & types = action.function->getArgumentTypes(); std::vector args; for (size_t i = 0; i < names.size(); ++i) { auto inserted = subexpressions.emplace(names[i], subexpression(arg_names.size())); if (inserted.second) { arg_names.push_back(names[i]); arg_types.push_back(types[i]); } args.push_back(inserted.first->second); } subexpressions[action.result_name] = subexpression(*action.function, std::move(args)); originals.push_back(action.function); } compileFunctionToLLVMByteCode(context, *this); } PreparedFunctionPtr LLVMFunction::prepare(const Block &) const { return std::make_shared(name, context); } bool LLVMFunction::isDeterministic() const { for (const auto & f : originals) if (!f->isDeterministic()) return false; return true; } bool LLVMFunction::isDeterministicInScopeOfQuery() const { for (const auto & f : originals) if (!f->isDeterministicInScopeOfQuery()) return false; return true; } bool LLVMFunction::isSuitableForConstantFolding() const { for (const auto & f : originals) if (!f->isSuitableForConstantFolding()) return false; return true; } bool LLVMFunction::isInjective(const Block & sample_block) { for (const auto & f : originals) if (!f->isInjective(sample_block)) return false; return true; } bool LLVMFunction::hasInformationAboutMonotonicity() const { for (const auto & f : originals) if (!f->hasInformationAboutMonotonicity()) return false; return true; } LLVMFunction::Monotonicity LLVMFunction::getMonotonicityForRange(const IDataType & type, const Field & left, const Field & right) const { const IDataType * type_ = &type; Field left_ = left; Field right_ = right; Monotonicity result(true, true, true); /// monotonicity is only defined for unary functions, so the chain must describe a sequence of nested calls for (size_t i = 0; i < originals.size(); ++i) { Monotonicity m = originals[i]->getMonotonicityForRange(*type_, left_, right_); if (!m.is_monotonic) return m; result.is_positive ^= !m.is_positive; result.is_always_monotonic &= m.is_always_monotonic; if (i + 1 < originals.size()) { if (left_ != Field()) applyFunction(*originals[i], left_); if (right_ != Field()) applyFunction(*originals[i], right_); if (!m.is_positive) std::swap(left_, right_); type_ = originals[i]->getReturnType().get(); } } return result; } static bool isCompilable(const IFunctionBase & function) { if (!canBeNativeType(*function.getReturnType())) return false; for (const auto & type : function.getArgumentTypes()) if (!canBeNativeType(*type)) return false; return function.isCompilable(); } size_t CompiledExpressionCache::weight() const { #if LLVM_VERSION_MAJOR >= 6 std::lock_guard lock(mutex); size_t result{0}; std::unordered_set seen; for (const auto & cell : cells) { auto function_context = cell.second.value->getContext(); if (!seen.count(function_context->id)) { result += function_context->memory_mapper->memory_tracker.get(); seen.insert(function_context->id); } } return result; #else return Base::weight(); #endif } std::vector>> getActionsDependents(const ExpressionActions::Actions & actions, const Names & output_columns) { /// an empty optional is a poisoned value prohibiting the column's producer from being removed /// (which it could be, if it was inlined into every dependent function). std::unordered_map>> current_dependents; for (const auto & name : output_columns) current_dependents[name].emplace(); /// a snapshot of each compilable function's dependents at the time of its execution. std::vector>> dependents(actions.size()); for (size_t i = actions.size(); i--;) { switch (actions[i].type) { case ExpressionAction::REMOVE_COLUMN: current_dependents.erase(actions[i].source_name); /// poison every other column used after this point so that inlining chains do not cross it. for (auto & dep : current_dependents) dep.second.emplace(); break; case ExpressionAction::PROJECT: current_dependents.clear(); for (const auto & proj : actions[i].projection) current_dependents[proj.first].emplace(); break; case ExpressionAction::ADD_ALIASES: for (const auto & proj : actions[i].projection) current_dependents[proj.first].emplace(); break; case ExpressionAction::ADD_COLUMN: case ExpressionAction::COPY_COLUMN: case ExpressionAction::ARRAY_JOIN: case ExpressionAction::JOIN: { Names columns = actions[i].getNeededColumns(); for (const auto & column : columns) current_dependents[column].emplace(); break; } case ExpressionAction::APPLY_FUNCTION: { dependents[i] = current_dependents[actions[i].result_name]; const bool compilable = isCompilable(*actions[i].function); for (const auto & name : actions[i].argument_names) { if (compilable) current_dependents[name].emplace(i); else current_dependents[name].emplace(); } break; } } } return dependents; } void compileFunctions(ExpressionActions::Actions & actions, const Names & output_columns, const Block & sample_block, std::shared_ptr compilation_cache, size_t min_count_to_compile) { static std::unordered_map counter; static std::mutex mutex; struct LLVMTargetInitializer { LLVMTargetInitializer() { llvm::InitializeNativeTarget(); llvm::InitializeNativeTargetAsmPrinter(); llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr); } }; static LLVMTargetInitializer initializer; auto dependents = getActionsDependents(actions, output_columns); /// Initialize context as late as possible and only if needed std::shared_ptr context; std::vector fused(actions.size()); for (size_t i = 0; i < actions.size(); ++i) { if (actions[i].type != ExpressionAction::APPLY_FUNCTION || !isCompilable(*actions[i].function)) continue; fused[i].push_back(actions[i]); if (dependents[i].find({}) != dependents[i].end()) { /// the result of compiling one function in isolation is pretty much the same as its `execute` method. if (fused[i].size() == 1) continue; auto hash_key = ExpressionActions::ActionsHash{}(fused[i]); { std::lock_guard lock(mutex); if (counter[hash_key]++ < min_count_to_compile) continue; } std::shared_ptr fn; if (compilation_cache) { /// Lock here, to be sure, that all functions will be compiled std::lock_guard lock(mutex); /// Don't use getOrSet here, because sometimes we need to initialize context fn = compilation_cache->get(hash_key); if (!fn) { if (!context) context = std::make_shared(); Stopwatch watch; fn = std::make_shared(fused[i], context, sample_block); ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); compilation_cache->set(hash_key, fn); } } else { if (!context) context = std::make_shared(); Stopwatch watch; fn = std::make_shared(fused[i], context, sample_block); ProfileEvents::increment(ProfileEvents::CompileExpressionsMicroseconds, watch.elapsedMicroseconds()); } actions[i].function = fn; actions[i].argument_names = fn->getArgumentNames(); actions[i].is_function_compiled = true; continue; } /// TODO: determine whether it's profitable to inline the function if there's more than one dependent. for (const auto & dep : dependents[i]) fused[*dep].insert(fused[*dep].end(), fused[i].begin(), fused[i].end()); } if (context) { /// Lock here, because other threads can get uncompilted functions from cache std::lock_guard lock(mutex); size_t used_memory = context->compileAllFunctionsToNativeCode(); ProfileEvents::increment(ProfileEvents::CompileExpressionsBytes, used_memory); } } } #endif