#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "formatString.h" namespace DB { namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_COLUMN; } using namespace GatherUtils; template class ConcatImpl : public IFunction { public: static constexpr auto name = Name::name; explicit ConcatImpl(const Context & context_) : context(context_) {} static FunctionPtr create(const Context & context) { return std::make_shared(context); } String getName() const override { return name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool isInjective(const Block &) override { return is_injective; } bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() < 2) throw Exception( "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be at least 2.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (arguments.size() > FormatImpl::argument_threshold) throw Exception( "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be at most " + std::to_string(FormatImpl::argument_threshold), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (const auto arg_idx : ext::range(0, arguments.size())) { const auto arg = arguments[arg_idx].get(); if (!isStringOrFixedString(arg)) throw Exception{"Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; } return std::make_shared(); } void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override { /// Format function is not proven to be faster for two arguments. /// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl. /// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%). /// For 3 and more arguments FormatImpl is much faster (up to 50-60%). if (arguments.size() == 2) executeBinary(block, arguments, result, input_rows_count); else executeFormatImpl(block, arguments, result, input_rows_count); } private: const Context & context; void executeBinary(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { const IColumn * c0 = block.getByPosition(arguments[0]).column.get(); const IColumn * c1 = block.getByPosition(arguments[1]).column.get(); const ColumnString * c0_string = checkAndGetColumn(c0); const ColumnString * c1_string = checkAndGetColumn(c1); const ColumnConst * c0_const_string = checkAndGetColumnConst(c0); const ColumnConst * c1_const_string = checkAndGetColumnConst(c1); auto c_res = ColumnString::create(); if (c0_string && c1_string) concat(StringSource(*c0_string), StringSource(*c1_string), StringSink(*c_res, c0->size())); else if (c0_string && c1_const_string) concat(StringSource(*c0_string), ConstSource(*c1_const_string), StringSink(*c_res, c0->size())); else if (c0_const_string && c1_string) concat(ConstSource(*c0_const_string), StringSource(*c1_string), StringSink(*c_res, c0->size())); else { /// Fallback: use generic implementation for not very important cases. executeFormatImpl(block, arguments, result, input_rows_count); return; } block.getByPosition(result).column = std::move(c_res); } void executeFormatImpl(Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count) { auto c_res = ColumnString::create(); std::vector data(arguments.size()); std::vector offsets(arguments.size()); std::vector fixed_string_sizes(arguments.size()); std::vector constant_strings(arguments.size()); bool has_column_string = false; bool has_column_fixed_string = false; for (size_t i = 0; i < arguments.size(); ++i) { const ColumnPtr & column = block.getByPosition(arguments[i]).column; if (const ColumnString * col = checkAndGetColumn(column.get())) { has_column_string = true; data[i] = &col->getChars(); offsets[i] = &col->getOffsets(); } else if (const ColumnFixedString * fixed_col = checkAndGetColumn(column.get())) { has_column_fixed_string = true; data[i] = &fixed_col->getChars(); fixed_string_sizes[i] = fixed_col->getN(); } else if (const ColumnConst * const_col = checkAndGetColumnConstStringOrFixedString(column.get())) { constant_strings[i] = const_col->getValue(); } else throw Exception( "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); } String pattern; pattern.reserve(2 * arguments.size()); for (size_t i = 0; i < arguments.size(); ++i) pattern += "{}"; FormatImpl::formatExecute( has_column_string, has_column_fixed_string, std::move(pattern), data, offsets, fixed_string_sizes, constant_strings, c_res->getChars(), c_res->getOffsets(), input_rows_count); block.getByPosition(result).column = std::move(c_res); } }; struct NameConcat { static constexpr auto name = "concat"; }; struct NameConcatAssumeInjective { static constexpr auto name = "concatAssumeInjective"; }; using FunctionConcat = ConcatImpl; using FunctionConcatAssumeInjective = ConcatImpl; /// Also works with arrays. class ConcatOverloadResolver : public IFunctionOverloadResolverImpl { public: static constexpr auto name = "concat"; static FunctionOverloadResolverImplPtr create(const Context & context) { return std::make_unique(context); } explicit ConcatOverloadResolver(const Context & context_) : context(context_) {} String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } FunctionBaseImplPtr build(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { if (isArray(arguments.at(0).type)) { return FunctionOverloadResolverAdaptor(FunctionFactory::instance().getImpl("arrayConcat", context)).buildImpl(arguments); } else return std::make_unique( FunctionConcat::create(context), ext::map(arguments, [](const auto & elem) { return elem.type; }), return_type); } DataTypePtr getReturnType(const DataTypes & arguments) const override { if (arguments.size() < 2) throw Exception( "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be at least 2.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// We always return Strings from concat, even if arguments were fixed strings. return std::make_shared(); } private: const Context & context; }; void registerFunctionsConcat(FunctionFactory & factory) { factory.registerFunction(FunctionFactory::CaseInsensitive); factory.registerFunction(); } }