2020-10-10 18:37:02 +00:00
|
|
|
#pragma once
|
2022-02-07 06:21:24 +00:00
|
|
|
#include <base/map.h>
|
|
|
|
|
2018-09-09 23:36:06 +00:00
|
|
|
#include <Functions/FunctionHelpers.h>
|
|
|
|
#include <Functions/GatherUtils/GatherUtils.h>
|
2018-09-09 23:47:56 +00:00
|
|
|
#include <Functions/GatherUtils/Sources.h>
|
2021-05-17 07:30:42 +00:00
|
|
|
#include <Functions/IFunction.h>
|
2020-04-06 07:44:54 +00:00
|
|
|
#include <Functions/PerformanceAdaptors.h>
|
|
|
|
#include <Functions/TargetSpecific.h>
|
2018-09-09 23:36:06 +00:00
|
|
|
#include <DataTypes/DataTypeString.h>
|
2018-09-09 23:47:56 +00:00
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2022-02-07 06:21:24 +00:00
|
|
|
#include <DataTypes/getLeastSupertype.h>
|
2018-09-09 23:36:06 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
2022-02-07 06:21:24 +00:00
|
|
|
#include <Interpreters/castColumn.h>
|
2018-09-09 23:36:06 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
using namespace GatherUtils;
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
2022-02-07 08:32:50 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2018-09-09 23:36:06 +00:00
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct NameStartsWith
|
|
|
|
{
|
|
|
|
static constexpr auto name = "startsWith";
|
|
|
|
};
|
|
|
|
struct NameEndsWith
|
|
|
|
{
|
|
|
|
static constexpr auto name = "endsWith";
|
|
|
|
};
|
|
|
|
|
2020-04-02 13:48:14 +00:00
|
|
|
DECLARE_MULTITARGET_CODE(
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2020-04-02 13:48:14 +00:00
|
|
|
template <typename Name>
|
2020-04-05 12:01:33 +00:00
|
|
|
class FunctionStartsEndsWith : public IFunction
|
|
|
|
{
|
2020-04-02 13:48:14 +00:00
|
|
|
public:
|
2020-04-05 12:01:33 +00:00
|
|
|
static constexpr auto name = Name::name;
|
|
|
|
|
|
|
|
String getName() const override
|
|
|
|
{
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2021-06-22 16:21:23 +00:00
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override
|
2021-04-29 14:48:26 +00:00
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-04-05 12:01:33 +00:00
|
|
|
size_t getNumberOfArguments() const override
|
|
|
|
{
|
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool useDefaultImplementationForConstants() const override
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
|
|
|
{
|
2022-02-07 06:21:24 +00:00
|
|
|
if (isStringOrFixedString(arguments[0]) && isStringOrFixedString(arguments[1]))
|
|
|
|
return std::make_shared<DataTypeUInt8>();
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2022-02-07 06:21:24 +00:00
|
|
|
if (isArray(arguments[0]) && isArray(arguments[1]))
|
|
|
|
return std::make_shared<DataTypeUInt8>();
|
2020-04-05 12:01:33 +00:00
|
|
|
|
2022-02-07 06:21:24 +00:00
|
|
|
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
2020-04-05 12:01:33 +00:00
|
|
|
}
|
|
|
|
|
2020-11-17 13:24:45 +00:00
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
2022-02-07 06:21:24 +00:00
|
|
|
{
|
|
|
|
auto data_type = arguments[0].type;
|
|
|
|
if (isStringOrFixedString(*data_type))
|
|
|
|
return executeImplString(arguments, {}, input_rows_count);
|
|
|
|
if (isArray(data_type))
|
|
|
|
return executeImplArray(arguments, {}, input_rows_count);
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
ColumnPtr executeImplArray(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
|
|
|
|
{
|
|
|
|
DataTypePtr common_type = getLeastSupertype(collections::map(arguments, [](auto & arg) { return arg.type; }));
|
|
|
|
|
|
|
|
Columns preprocessed_columns(2);
|
|
|
|
for (size_t i = 0; i < 2; ++i)
|
|
|
|
preprocessed_columns[i] = castColumn(arguments[i], common_type);
|
|
|
|
|
|
|
|
std::vector<std::unique_ptr<GatherUtils::IArraySource>> sources;
|
|
|
|
for (auto & argument_column : preprocessed_columns)
|
|
|
|
{
|
|
|
|
bool is_const = false;
|
|
|
|
|
|
|
|
if (const auto * argument_column_const = typeid_cast<const ColumnConst *>(argument_column.get()))
|
|
|
|
{
|
|
|
|
is_const = true;
|
|
|
|
argument_column = argument_column_const->getDataColumnPtr();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const auto * argument_column_array = typeid_cast<const ColumnArray *>(argument_column.get()))
|
|
|
|
sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, input_rows_count));
|
|
|
|
else
|
|
|
|
throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR};
|
|
|
|
}
|
|
|
|
|
|
|
|
auto result_column = ColumnUInt8::create(input_rows_count);
|
|
|
|
auto * result_column_ptr = typeid_cast<ColumnUInt8 *>(result_column.get());
|
|
|
|
|
|
|
|
if constexpr (std::is_same_v<Name, NameStartsWith>)
|
|
|
|
GatherUtils::sliceHas(*sources[0], *sources[1], GatherUtils::ArraySearchType::StartsWith, *result_column_ptr);
|
|
|
|
else
|
|
|
|
GatherUtils::sliceHas(*sources[0], *sources[1], GatherUtils::ArraySearchType::EndsWith, *result_column_ptr);
|
|
|
|
|
|
|
|
return result_column;
|
|
|
|
}
|
|
|
|
|
|
|
|
ColumnPtr executeImplString(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const
|
2018-09-09 23:36:06 +00:00
|
|
|
{
|
2020-10-17 21:41:50 +00:00
|
|
|
const IColumn * haystack_column = arguments[0].column.get();
|
|
|
|
const IColumn * needle_column = arguments[1].column.get();
|
2018-09-09 23:36:06 +00:00
|
|
|
|
|
|
|
auto col_res = ColumnVector<UInt8>::create();
|
|
|
|
typename ColumnVector<UInt8>::Container & vec_res = col_res->getData();
|
|
|
|
|
|
|
|
vec_res.resize(input_rows_count);
|
|
|
|
|
|
|
|
if (const ColumnString * haystack = checkAndGetColumn<ColumnString>(haystack_column))
|
|
|
|
dispatch<StringSource>(StringSource(*haystack), needle_column, vec_res);
|
2019-01-04 12:10:00 +00:00
|
|
|
else if (const ColumnFixedString * haystack_fixed = checkAndGetColumn<ColumnFixedString>(haystack_column))
|
|
|
|
dispatch<FixedStringSource>(FixedStringSource(*haystack_fixed), needle_column, vec_res);
|
|
|
|
else if (const ColumnConst * haystack_const = checkAndGetColumnConst<ColumnString>(haystack_column))
|
|
|
|
dispatch<ConstSource<StringSource>>(ConstSource<StringSource>(*haystack_const), needle_column, vec_res);
|
|
|
|
else if (const ColumnConst * haystack_const_fixed = checkAndGetColumnConst<ColumnFixedString>(haystack_column))
|
|
|
|
dispatch<ConstSource<FixedStringSource>>(ConstSource<FixedStringSource>(*haystack_const_fixed), needle_column, vec_res);
|
2018-09-09 23:36:06 +00:00
|
|
|
else
|
2020-04-05 12:01:33 +00:00
|
|
|
throw Exception("Illegal combination of columns as arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
|
2018-09-09 23:36:06 +00:00
|
|
|
|
2020-10-17 21:41:50 +00:00
|
|
|
return col_res;
|
2018-09-09 23:36:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename HaystackSource>
|
2020-04-05 12:01:33 +00:00
|
|
|
void dispatch(HaystackSource haystack_source, const IColumn * needle_column, PaddedPODArray<UInt8> & res_data) const
|
2018-09-09 23:36:06 +00:00
|
|
|
{
|
|
|
|
if (const ColumnString * needle = checkAndGetColumn<ColumnString>(needle_column))
|
|
|
|
execute<HaystackSource, StringSource>(haystack_source, StringSource(*needle), res_data);
|
2019-01-04 12:10:00 +00:00
|
|
|
else if (const ColumnFixedString * needle_fixed = checkAndGetColumn<ColumnFixedString>(needle_column))
|
|
|
|
execute<HaystackSource, FixedStringSource>(haystack_source, FixedStringSource(*needle_fixed), res_data);
|
|
|
|
else if (const ColumnConst * needle_const = checkAndGetColumnConst<ColumnString>(needle_column))
|
|
|
|
execute<HaystackSource, ConstSource<StringSource>>(haystack_source, ConstSource<StringSource>(*needle_const), res_data);
|
|
|
|
else if (const ColumnConst * needle_const_fixed = checkAndGetColumnConst<ColumnFixedString>(needle_column))
|
|
|
|
execute<HaystackSource, ConstSource<FixedStringSource>>(haystack_source, ConstSource<FixedStringSource>(*needle_const_fixed), res_data);
|
2018-09-09 23:36:06 +00:00
|
|
|
else
|
2020-04-05 12:01:33 +00:00
|
|
|
throw Exception("Illegal combination of columns as arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);
|
2018-09-09 23:36:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename HaystackSource, typename NeedleSource>
|
|
|
|
static void execute(HaystackSource haystack_source, NeedleSource needle_source, PaddedPODArray<UInt8> & res_data)
|
|
|
|
{
|
|
|
|
size_t row_num = 0;
|
|
|
|
|
|
|
|
while (!haystack_source.isEnd())
|
|
|
|
{
|
|
|
|
auto haystack = haystack_source.getWhole();
|
|
|
|
auto needle = needle_source.getWhole();
|
|
|
|
|
|
|
|
if (needle.size > haystack.size)
|
|
|
|
{
|
|
|
|
res_data[row_num] = false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if constexpr (std::is_same_v<Name, NameStartsWith>)
|
|
|
|
{
|
|
|
|
res_data[row_num] = StringRef(haystack.data, needle.size) == StringRef(needle.data, needle.size);
|
|
|
|
}
|
|
|
|
else /// endsWith
|
|
|
|
{
|
|
|
|
res_data[row_num] = StringRef(haystack.data + haystack.size - needle.size, needle.size) == StringRef(needle.data, needle.size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
haystack_source.next();
|
|
|
|
needle_source.next();
|
|
|
|
++row_num;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2020-04-02 13:48:14 +00:00
|
|
|
) // DECLARE_MULTITARGET_CODE
|
|
|
|
|
|
|
|
template <typename Name>
|
2020-05-18 20:07:24 +00:00
|
|
|
class FunctionStartsEndsWith : public TargetSpecific::Default::FunctionStartsEndsWith<Name>
|
2020-04-02 13:48:14 +00:00
|
|
|
{
|
|
|
|
public:
|
2021-06-01 12:20:52 +00:00
|
|
|
explicit FunctionStartsEndsWith(ContextPtr context) : selector(context)
|
2020-04-02 13:48:14 +00:00
|
|
|
{
|
2020-05-18 20:07:24 +00:00
|
|
|
selector.registerImplementation<TargetArch::Default,
|
|
|
|
TargetSpecific::Default::FunctionStartsEndsWith<Name>>();
|
|
|
|
|
2020-05-26 15:56:46 +00:00
|
|
|
#if USE_MULTITARGET_CODE
|
|
|
|
selector.registerImplementation<TargetArch::SSE42,
|
|
|
|
TargetSpecific::SSE42::FunctionStartsEndsWith<Name>>();
|
|
|
|
selector.registerImplementation<TargetArch::AVX,
|
|
|
|
TargetSpecific::AVX::FunctionStartsEndsWith<Name>>();
|
|
|
|
selector.registerImplementation<TargetArch::AVX2,
|
|
|
|
TargetSpecific::AVX2::FunctionStartsEndsWith<Name>>();
|
|
|
|
selector.registerImplementation<TargetArch::AVX512F,
|
|
|
|
TargetSpecific::AVX512F::FunctionStartsEndsWith<Name>>();
|
|
|
|
#endif
|
2020-04-02 13:48:14 +00:00
|
|
|
}
|
2020-05-15 10:10:34 +00:00
|
|
|
|
2020-11-17 13:24:45 +00:00
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
2020-05-18 20:07:24 +00:00
|
|
|
{
|
2020-10-17 21:41:50 +00:00
|
|
|
return selector.selectAndExecute(arguments, result_type, input_rows_count);
|
2020-05-18 20:07:24 +00:00
|
|
|
}
|
|
|
|
|
2021-06-01 12:20:52 +00:00
|
|
|
static FunctionPtr create(ContextPtr context)
|
2020-04-02 13:48:14 +00:00
|
|
|
{
|
2020-04-05 13:14:59 +00:00
|
|
|
return std::make_shared<FunctionStartsEndsWith<Name>>(context);
|
2020-04-02 13:48:14 +00:00
|
|
|
}
|
2020-05-18 20:07:24 +00:00
|
|
|
|
|
|
|
private:
|
|
|
|
ImplementationSelector<IFunction> selector;
|
2020-04-02 13:48:14 +00:00
|
|
|
};
|
|
|
|
|
2020-04-06 07:44:54 +00:00
|
|
|
}
|