2019-05-20 21:06:43 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
2024-03-09 07:10:59 +00:00
|
|
|
#include <Columns/ColumnStringHelpers.h>
|
2018-09-09 23:36:06 +00:00
|
|
|
#include <DataTypes/DataTypeString.h>
|
|
|
|
#include <Functions/FunctionFactory.h>
|
|
|
|
#include <Functions/FunctionHelpers.h>
|
2019-05-20 21:06:43 +00:00
|
|
|
#include <Functions/GatherUtils/Algorithms.h>
|
2018-09-09 23:47:56 +00:00
|
|
|
#include <Functions/GatherUtils/Sinks.h>
|
2019-05-20 21:06:43 +00:00
|
|
|
#include <Functions/GatherUtils/Sources.h>
|
2021-05-17 07:30:42 +00:00
|
|
|
#include <Functions/IFunction.h>
|
2023-12-05 20:14:11 +00:00
|
|
|
#include <Functions/formatString.h>
|
2018-09-09 23:36:06 +00:00
|
|
|
#include <IO/WriteHelpers.h>
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/map.h>
|
2018-09-09 23:36:06 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2024-05-06 09:35:48 +00:00
|
|
|
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
2018-09-09 23:36:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
using namespace GatherUtils;
|
|
|
|
|
2020-09-07 18:00:37 +00:00
|
|
|
namespace
|
|
|
|
{
|
2018-09-09 23:36:06 +00:00
|
|
|
|
|
|
|
template <typename Name, bool is_injective>
|
|
|
|
class ConcatImpl : public IFunction
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr auto name = Name::name;
|
2023-11-15 12:53:52 +00:00
|
|
|
explicit ConcatImpl(ContextPtr context_) : context(context_) { }
|
2021-06-01 12:20:52 +00:00
|
|
|
static FunctionPtr create(ContextPtr context) { return std::make_shared<ConcatImpl>(context); }
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2019-05-20 21:06:43 +00:00
|
|
|
String getName() const override { return name; }
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2019-05-20 21:06:43 +00:00
|
|
|
bool isVariadic() const override { return true; }
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2019-05-20 21:06:43 +00:00
|
|
|
size_t getNumberOfArguments() const override { return 0; }
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2020-10-09 07:41:28 +00:00
|
|
|
bool isInjective(const ColumnsWithTypeAndName &) const override { return is_injective; }
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2021-06-22 16:21:23 +00:00
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
2021-04-29 14:48:26 +00:00
|
|
|
|
2018-09-09 23:36:06 +00:00
|
|
|
bool useDefaultImplementationForConstants() const override { return true; }
|
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
|
|
|
{
|
2024-07-01 07:29:32 +00:00
|
|
|
if (arguments.size() == 1)
|
|
|
|
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Number of arguments for function {} should not be 1", getName());
|
2019-05-20 21:12:25 +00:00
|
|
|
|
2018-09-09 23:36:06 +00:00
|
|
|
return std::make_shared<DataTypeString>();
|
|
|
|
}
|
|
|
|
|
2020-11-17 13:24:45 +00:00
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
2018-09-09 23:36:06 +00:00
|
|
|
{
|
2024-07-01 07:29:32 +00:00
|
|
|
if (arguments.empty())
|
|
|
|
{
|
|
|
|
auto res_data = ColumnString::create();
|
|
|
|
res_data->insertDefault();
|
|
|
|
return ColumnConst::create(std::move(res_data), input_rows_count);
|
|
|
|
}
|
|
|
|
else if (arguments.size() == 1)
|
|
|
|
return arguments[0].column;
|
2019-05-20 21:06:43 +00:00
|
|
|
/// Format function is not proven to be faster for two arguments.
|
|
|
|
/// Actually there is overhead of 2 to 5 extra instructions for each string for checking empty strings in FormatImpl.
|
|
|
|
/// Though, benchmarks are really close, for most examples we saw executeBinary is slightly faster (0-3%).
|
2023-11-09 16:54:45 +00:00
|
|
|
/// For 3 and more arguments FormatStringImpl is much faster (up to 50-60%).
|
2024-07-01 07:29:32 +00:00
|
|
|
else if (arguments.size() == 2)
|
2020-10-17 16:48:53 +00:00
|
|
|
return executeBinary(arguments, input_rows_count);
|
2024-07-01 07:29:32 +00:00
|
|
|
else
|
|
|
|
return executeFormatImpl(arguments, input_rows_count);
|
2018-09-09 23:36:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2021-06-01 12:20:52 +00:00
|
|
|
ContextWeakPtr context;
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2020-11-17 13:24:45 +00:00
|
|
|
ColumnPtr executeBinary(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
|
2018-09-09 23:36:06 +00:00
|
|
|
{
|
2020-10-17 16:48:53 +00:00
|
|
|
const IColumn * c0 = arguments[0].column.get();
|
|
|
|
const IColumn * c1 = arguments[1].column.get();
|
2018-09-09 23:36:06 +00:00
|
|
|
|
|
|
|
const ColumnString * c0_string = checkAndGetColumn<ColumnString>(c0);
|
|
|
|
const ColumnString * c1_string = checkAndGetColumn<ColumnString>(c1);
|
|
|
|
const ColumnConst * c0_const_string = checkAndGetColumnConst<ColumnString>(c0);
|
|
|
|
const ColumnConst * c1_const_string = checkAndGetColumnConst<ColumnString>(c1);
|
|
|
|
|
2024-01-29 13:35:19 +00:00
|
|
|
auto col_res = ColumnString::create();
|
2018-09-09 23:36:06 +00:00
|
|
|
|
|
|
|
if (c0_string && c1_string)
|
2024-01-29 13:35:19 +00:00
|
|
|
concat(StringSource(*c0_string), StringSource(*c1_string), StringSink(*col_res, c0->size()));
|
2018-09-09 23:36:06 +00:00
|
|
|
else if (c0_string && c1_const_string)
|
2024-01-29 13:35:19 +00:00
|
|
|
concat(StringSource(*c0_string), ConstSource<StringSource>(*c1_const_string), StringSink(*col_res, c0->size()));
|
2018-09-09 23:36:06 +00:00
|
|
|
else if (c0_const_string && c1_string)
|
2024-01-29 13:35:19 +00:00
|
|
|
concat(ConstSource<StringSource>(*c0_const_string), StringSource(*c1_string), StringSink(*col_res, c0->size()));
|
2018-09-09 23:36:06 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
/// Fallback: use generic implementation for not very important cases.
|
2020-10-17 16:48:53 +00:00
|
|
|
return executeFormatImpl(arguments, input_rows_count);
|
2018-09-09 23:36:06 +00:00
|
|
|
}
|
|
|
|
|
2024-01-29 13:35:19 +00:00
|
|
|
return col_res;
|
2018-09-09 23:36:06 +00:00
|
|
|
}
|
|
|
|
|
2020-11-17 13:24:45 +00:00
|
|
|
ColumnPtr executeFormatImpl(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
|
2018-09-09 23:36:06 +00:00
|
|
|
{
|
2020-05-15 18:55:30 +00:00
|
|
|
const size_t num_arguments = arguments.size();
|
2020-05-16 20:15:19 +00:00
|
|
|
assert(num_arguments >= 2);
|
2020-05-15 15:48:19 +00:00
|
|
|
|
2024-01-29 13:35:19 +00:00
|
|
|
auto col_res = ColumnString::create();
|
2020-05-15 18:55:30 +00:00
|
|
|
std::vector<const ColumnString::Chars *> data(num_arguments);
|
|
|
|
std::vector<const ColumnString::Offsets *> offsets(num_arguments);
|
|
|
|
std::vector<size_t> fixed_string_sizes(num_arguments);
|
2022-03-28 04:58:22 +00:00
|
|
|
std::vector<std::optional<String>> constant_strings(num_arguments);
|
2023-11-15 15:42:00 +00:00
|
|
|
std::vector<ColumnString::MutablePtr> converted_col_ptrs(num_arguments);
|
2019-05-20 21:06:43 +00:00
|
|
|
bool has_column_string = false;
|
|
|
|
bool has_column_fixed_string = false;
|
2020-05-15 18:55:30 +00:00
|
|
|
for (size_t i = 0; i < num_arguments; ++i)
|
2019-05-20 21:06:43 +00:00
|
|
|
{
|
2020-10-17 16:48:53 +00:00
|
|
|
const ColumnPtr & column = arguments[i].column;
|
2019-05-20 21:06:43 +00:00
|
|
|
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column.get()))
|
|
|
|
{
|
|
|
|
has_column_string = true;
|
|
|
|
data[i] = &col->getChars();
|
|
|
|
offsets[i] = &col->getOffsets();
|
|
|
|
}
|
|
|
|
else if (const ColumnFixedString * fixed_col = checkAndGetColumn<ColumnFixedString>(column.get()))
|
|
|
|
{
|
|
|
|
has_column_fixed_string = true;
|
|
|
|
data[i] = &fixed_col->getChars();
|
2020-03-23 02:12:31 +00:00
|
|
|
fixed_string_sizes[i] = fixed_col->getN();
|
2019-05-20 21:06:43 +00:00
|
|
|
}
|
|
|
|
else if (const ColumnConst * const_col = checkAndGetColumnConstStringOrFixedString(column.get()))
|
|
|
|
{
|
|
|
|
constant_strings[i] = const_col->getValue<String>();
|
|
|
|
}
|
|
|
|
else
|
2023-11-09 16:54:45 +00:00
|
|
|
{
|
2023-11-15 15:42:00 +00:00
|
|
|
/// A non-String/non-FixedString-type argument: use the default serialization to convert it to String
|
2023-11-15 15:53:38 +00:00
|
|
|
auto full_column = column->convertToFullIfNeeded();
|
|
|
|
auto serialization = arguments[i].type->getDefaultSerialization();
|
2023-11-15 15:42:00 +00:00
|
|
|
auto converted_col_str = ColumnString::create();
|
2023-11-15 12:53:52 +00:00
|
|
|
ColumnStringHelpers::WriteHelper write_helper(*converted_col_str, column->size());
|
|
|
|
auto & write_buffer = write_helper.getWriteBuffer();
|
2023-11-15 15:42:00 +00:00
|
|
|
FormatSettings format_settings;
|
2023-11-15 15:53:38 +00:00
|
|
|
for (size_t row = 0; row < column->size(); ++row)
|
2023-11-15 12:53:52 +00:00
|
|
|
{
|
2023-11-15 15:53:38 +00:00
|
|
|
serialization->serializeText(*full_column, row, write_buffer, format_settings);
|
2023-11-15 12:53:52 +00:00
|
|
|
write_helper.rowWritten();
|
|
|
|
}
|
|
|
|
write_helper.finalize();
|
|
|
|
|
2023-11-15 15:53:38 +00:00
|
|
|
/// Keep the pointer alive
|
2023-11-15 12:53:52 +00:00
|
|
|
converted_col_ptrs[i] = std::move(converted_col_str);
|
2023-12-23 07:23:15 +00:00
|
|
|
|
|
|
|
/// Same as the normal `ColumnString` branch
|
|
|
|
has_column_string = true;
|
|
|
|
data[i] = &converted_col_ptrs[i]->getChars();
|
|
|
|
offsets[i] = &converted_col_ptrs[i]->getOffsets();
|
2023-11-09 16:54:45 +00:00
|
|
|
}
|
2019-05-20 21:06:43 +00:00
|
|
|
}
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2019-05-20 21:06:43 +00:00
|
|
|
String pattern;
|
2020-05-15 18:55:30 +00:00
|
|
|
pattern.reserve(2 * num_arguments);
|
2019-05-20 21:06:43 +00:00
|
|
|
|
2020-05-15 18:55:30 +00:00
|
|
|
for (size_t i = 0; i < num_arguments; ++i)
|
2019-05-20 21:06:43 +00:00
|
|
|
pattern += "{}";
|
|
|
|
|
2023-11-09 16:54:45 +00:00
|
|
|
FormatStringImpl::formatExecute(
|
2019-05-20 21:06:43 +00:00
|
|
|
has_column_string,
|
|
|
|
has_column_fixed_string,
|
|
|
|
std::move(pattern),
|
|
|
|
data,
|
|
|
|
offsets,
|
2020-03-23 02:12:31 +00:00
|
|
|
fixed_string_sizes,
|
2019-05-20 21:06:43 +00:00
|
|
|
constant_strings,
|
2024-01-29 13:35:19 +00:00
|
|
|
col_res->getChars(),
|
|
|
|
col_res->getOffsets(),
|
2019-05-20 21:06:43 +00:00
|
|
|
input_rows_count);
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2024-01-29 13:35:19 +00:00
|
|
|
return col_res;
|
2018-09-09 23:36:06 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-09-10 00:51:08 +00:00
|
|
|
|
2018-09-09 23:36:06 +00:00
|
|
|
struct NameConcat
|
|
|
|
{
|
|
|
|
static constexpr auto name = "concat";
|
|
|
|
};
|
|
|
|
struct NameConcatAssumeInjective
|
|
|
|
{
|
|
|
|
static constexpr auto name = "concatAssumeInjective";
|
|
|
|
};
|
|
|
|
|
|
|
|
using FunctionConcat = ConcatImpl<NameConcat, false>;
|
|
|
|
using FunctionConcatAssumeInjective = ConcatImpl<NameConcatAssumeInjective, true>;
|
|
|
|
|
2018-09-10 00:51:08 +00:00
|
|
|
|
2023-11-09 16:54:45 +00:00
|
|
|
/// Works with arrays via `arrayConcat`, maps via `mapConcat`, and tuples via `tupleConcat`.
|
|
|
|
/// Additionally, allows concatenation of arbitrary types that can be cast to string using the corresponding default serialization.
|
2021-05-15 17:33:15 +00:00
|
|
|
class ConcatOverloadResolver : public IFunctionOverloadResolver
|
2018-09-10 00:51:08 +00:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
static constexpr auto name = "concat";
|
2021-06-01 12:20:52 +00:00
|
|
|
static FunctionOverloadResolverPtr create(ContextPtr context) { return std::make_unique<ConcatOverloadResolver>(context); }
|
2018-09-10 00:51:08 +00:00
|
|
|
|
2023-11-15 12:53:52 +00:00
|
|
|
explicit ConcatOverloadResolver(ContextPtr context_) : context(context_) { }
|
2018-09-10 00:51:08 +00:00
|
|
|
|
|
|
|
String getName() const override { return name; }
|
2018-09-10 01:19:14 +00:00
|
|
|
size_t getNumberOfArguments() const override { return 0; }
|
2018-09-10 01:39:06 +00:00
|
|
|
bool isVariadic() const override { return true; }
|
2018-09-10 00:51:08 +00:00
|
|
|
|
2021-05-15 17:33:15 +00:00
|
|
|
FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override
|
2018-09-10 00:51:08 +00:00
|
|
|
{
|
2023-11-20 15:17:31 +00:00
|
|
|
if (arguments.size() == 1)
|
|
|
|
return FunctionFactory::instance().getImpl("toString", context)->build(arguments);
|
2024-07-01 07:29:32 +00:00
|
|
|
if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isArray(elem.type); }))
|
2021-05-15 17:33:15 +00:00
|
|
|
return FunctionFactory::instance().getImpl("arrayConcat", context)->build(arguments);
|
2024-07-01 07:29:32 +00:00
|
|
|
if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isMap(elem.type); }))
|
2023-04-26 14:39:59 +00:00
|
|
|
return FunctionFactory::instance().getImpl("mapConcat", context)->build(arguments);
|
2024-07-01 07:29:32 +00:00
|
|
|
if (!arguments.empty() && std::ranges::all_of(arguments, [](const auto & elem) { return isTuple(elem.type); }))
|
2023-08-09 20:52:09 +00:00
|
|
|
return FunctionFactory::instance().getImpl("tupleConcat", context)->build(arguments);
|
2023-11-15 12:53:52 +00:00
|
|
|
return std::make_unique<FunctionToFunctionBaseAdaptor>(
|
|
|
|
FunctionConcat::create(context),
|
|
|
|
collections::map<DataTypes>(arguments, [](const auto & elem) { return elem.type; }),
|
|
|
|
return_type);
|
2018-09-10 00:51:08 +00:00
|
|
|
}
|
|
|
|
|
2024-07-01 07:29:32 +00:00
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes &) const override
|
2018-09-10 00:51:08 +00:00
|
|
|
{
|
2019-11-05 13:23:09 +00:00
|
|
|
/// We always return Strings from concat, even if arguments were fixed strings.
|
|
|
|
return std::make_shared<DataTypeString>();
|
2018-09-10 00:51:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
2021-06-01 12:20:52 +00:00
|
|
|
ContextPtr context;
|
2018-09-10 00:51:08 +00:00
|
|
|
};
|
|
|
|
|
2020-09-07 18:00:37 +00:00
|
|
|
}
|
2018-09-10 00:51:08 +00:00
|
|
|
|
2022-07-04 07:01:39 +00:00
|
|
|
REGISTER_FUNCTION(Concat)
|
2018-09-09 23:36:06 +00:00
|
|
|
{
|
2022-08-27 20:06:03 +00:00
|
|
|
factory.registerFunction<ConcatOverloadResolver>({}, FunctionFactory::CaseInsensitive);
|
2018-09-09 23:36:06 +00:00
|
|
|
factory.registerFunction<FunctionConcatAssumeInjective>();
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|