2023-07-06 13:16:31 +00:00
|
|
|
#pragma once
|
2023-07-07 13:15:26 +00:00
|
|
|
|
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <Functions/GatherUtils/Sources.h>
|
|
|
|
#include <Functions/GatherUtils/Sinks.h>
|
2023-07-10 09:18:09 +00:00
|
|
|
|
2023-07-06 13:16:31 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2023-07-10 09:18:09 +00:00
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
}
|
2023-07-06 13:16:31 +00:00
|
|
|
namespace
|
|
|
|
{
|
2023-07-07 13:15:26 +00:00
|
|
|
|
|
|
|
using namespace GatherUtils;
|
|
|
|
|
2023-07-06 13:16:31 +00:00
|
|
|
template <typename Name, typename Impl>
|
2023-07-10 09:18:09 +00:00
|
|
|
class HasSubsequenceImpl : public IFunction
|
2023-07-06 13:16:31 +00:00
|
|
|
{
|
2023-07-07 13:15:26 +00:00
|
|
|
public:
|
2023-07-06 13:16:31 +00:00
|
|
|
static constexpr auto name = Name::name;
|
|
|
|
|
2023-07-10 09:18:09 +00:00
|
|
|
static FunctionPtr create(ContextPtr) { return std::make_shared<HasSubsequenceImpl>(); }
|
2023-07-06 13:16:31 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
String getName() const override { return name; }
|
2023-07-06 19:43:37 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
bool isVariadic() const override { return false; }
|
2023-07-06 13:16:31 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
2023-07-06 13:16:31 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
size_t getNumberOfArguments() const override { return 2; }
|
2023-07-06 13:16:31 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
bool useDefaultImplementationForConstants() const override { return false; }
|
|
|
|
|
|
|
|
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {};}
|
2023-07-06 13:16:31 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
2023-07-06 13:16:31 +00:00
|
|
|
{
|
2023-07-07 13:15:26 +00:00
|
|
|
if (!isString(arguments[0]))
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
|
|
|
"Illegal type {} of argument of function {}",
|
|
|
|
arguments[0]->getName(), getName());
|
2023-07-06 19:43:37 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
if (!isString(arguments[1]))
|
|
|
|
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
|
|
|
"Illegal type {} of argument of function {}",
|
|
|
|
arguments[1]->getName(), getName());
|
2023-07-06 13:16:31 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
return std::make_shared<DataTypeNumber<UInt8>>();
|
2023-07-06 13:16:31 +00:00
|
|
|
}
|
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & /*result_type*/, size_t input_rows_count) const override
|
2023-07-06 13:16:31 +00:00
|
|
|
{
|
2023-07-07 13:15:26 +00:00
|
|
|
const ColumnPtr & column_haystack = arguments[0].column;
|
|
|
|
const ColumnPtr & column_needle = arguments[1].column;
|
|
|
|
|
|
|
|
const ColumnConst * haystack_const_string = checkAndGetColumnConst<ColumnString>(column_haystack.get());
|
|
|
|
const ColumnConst * needle_const_string = checkAndGetColumnConst<ColumnString>(column_needle.get());
|
|
|
|
const ColumnString * haystack_string = checkAndGetColumn<ColumnString>(&*column_haystack);
|
|
|
|
const ColumnString * needle_string = checkAndGetColumn<ColumnString>(&*column_needle);
|
|
|
|
|
|
|
|
auto col_res = ColumnVector<UInt8>::create();
|
|
|
|
typename ColumnVector<UInt8>::Container & vec_res = col_res->getData();
|
|
|
|
vec_res.resize(input_rows_count);
|
|
|
|
|
|
|
|
if (haystack_string && needle_string)
|
|
|
|
execute(StringSource{*haystack_string}, StringSource{*needle_string}, vec_res);
|
|
|
|
else if (haystack_string && needle_const_string)
|
|
|
|
execute(StringSource{*haystack_string}, ConstSource<StringSource>{*needle_const_string}, vec_res);
|
|
|
|
else if (haystack_const_string && needle_string)
|
|
|
|
execute(ConstSource<StringSource>{*haystack_const_string}, StringSource{*needle_string}, vec_res);
|
|
|
|
else if (haystack_const_string && needle_const_string)
|
|
|
|
execute(ConstSource<StringSource>{*haystack_const_string}, ConstSource<StringSource>{*needle_const_string}, vec_res);
|
|
|
|
else
|
|
|
|
throw Exception(
|
|
|
|
ErrorCodes::ILLEGAL_COLUMN,
|
2023-07-10 09:18:09 +00:00
|
|
|
"Illegal columns {} and {} of arguments of function {}",
|
2023-07-07 13:15:26 +00:00
|
|
|
arguments[0].column->getName(),
|
2023-07-10 09:18:09 +00:00
|
|
|
arguments[1].column->getName(),
|
2023-07-07 13:15:26 +00:00
|
|
|
getName());
|
|
|
|
|
|
|
|
return col_res;
|
|
|
|
}
|
2023-07-06 13:16:31 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
private:
|
2023-07-06 13:16:31 +00:00
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
template <typename SourceHaystack, typename SourceNeedle>
|
|
|
|
void execute(
|
|
|
|
SourceHaystack && haystacks,
|
|
|
|
SourceNeedle && needles,
|
|
|
|
PaddedPODArray<UInt8> & res_data) const
|
|
|
|
{
|
|
|
|
while (!haystacks.isEnd())
|
2023-07-06 13:16:31 +00:00
|
|
|
{
|
2023-07-10 09:18:09 +00:00
|
|
|
auto haystack_slice = haystacks.getWhole();
|
|
|
|
auto needle_slice = needles.getWhole();
|
|
|
|
size_t row_num = haystacks.rowNum();
|
2023-07-07 13:15:26 +00:00
|
|
|
|
2023-07-10 07:29:31 +00:00
|
|
|
if constexpr (!Impl::is_utf8)
|
2023-07-10 09:18:09 +00:00
|
|
|
res_data[row_num] = hasSubsequence(haystack_slice.data, haystack_slice.size, needle_slice.data, needle_slice.size);
|
2023-07-10 07:29:31 +00:00
|
|
|
else
|
2023-07-10 09:18:09 +00:00
|
|
|
res_data[row_num] = hasSubsequenceUTF8(haystack_slice.data, haystack_slice.size, needle_slice.data, needle_slice.size);
|
|
|
|
|
2023-07-07 13:15:26 +00:00
|
|
|
haystacks.next();
|
|
|
|
needles.next();
|
2023-07-06 13:16:31 +00:00
|
|
|
}
|
|
|
|
}
|
2023-07-06 19:43:37 +00:00
|
|
|
|
2023-07-10 09:18:09 +00:00
|
|
|
static UInt8 hasSubsequence(const UInt8 * haystack, size_t haystack_size, const UInt8 * needle, size_t needle_size)
|
2023-07-06 19:43:37 +00:00
|
|
|
{
|
|
|
|
size_t j = 0;
|
|
|
|
for (size_t i = 0; (i < haystack_size) && (j < needle_size); i++)
|
2023-07-10 09:18:09 +00:00
|
|
|
if (Impl::toLowerIfNeed(needle[j]) == Impl::toLowerIfNeed(haystack[i]))
|
2023-07-06 19:43:37 +00:00
|
|
|
++j;
|
|
|
|
return j == needle_size;
|
|
|
|
}
|
2023-07-10 07:29:31 +00:00
|
|
|
|
2023-07-10 09:18:09 +00:00
|
|
|
static UInt8 hasSubsequenceUTF8(const UInt8 * haystack, size_t haystack_size, const UInt8 * needle, size_t needle_size)
|
2023-07-10 07:29:31 +00:00
|
|
|
{
|
|
|
|
const auto * haystack_pos = haystack;
|
|
|
|
const auto * needle_pos = needle;
|
|
|
|
const auto * haystack_end = haystack + haystack_size;
|
|
|
|
const auto * needle_end = needle + needle_size;
|
|
|
|
|
|
|
|
if (!needle_size)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
auto haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
|
|
|
|
auto needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
|
|
|
|
if (!haystack_code_point || !needle_code_point)
|
|
|
|
return 0;
|
2023-07-10 09:18:09 +00:00
|
|
|
|
|
|
|
while (haystack_code_point && needle_code_point)
|
|
|
|
{
|
|
|
|
if (Impl::toLowerIfNeed(*needle_code_point) == Impl::toLowerIfNeed(*haystack_code_point))
|
2023-07-10 07:29:31 +00:00
|
|
|
{
|
|
|
|
needle_pos += UTF8::seqLength(*needle_pos);
|
2023-07-10 09:18:09 +00:00
|
|
|
if (needle_pos >= needle_end)
|
2023-07-10 07:29:31 +00:00
|
|
|
break;
|
|
|
|
needle_code_point = UTF8::convertUTF8ToCodePoint(needle_pos, needle_end - needle_pos);
|
|
|
|
}
|
|
|
|
haystack_pos += UTF8::seqLength(*haystack_pos);
|
2023-07-10 09:18:09 +00:00
|
|
|
if (haystack_pos >= haystack_end)
|
2023-07-10 07:29:31 +00:00
|
|
|
break;
|
|
|
|
haystack_code_point = UTF8::convertUTF8ToCodePoint(haystack_pos, haystack_end - haystack_pos);
|
|
|
|
}
|
|
|
|
return needle_pos == needle_end;
|
|
|
|
}
|
2023-07-06 13:16:31 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|