2020-03-29 17:04:16 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-03-30 13:21:54 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int ILLEGAL_COLUMN;
|
|
|
|
}
|
|
|
|
|
2020-03-29 17:04:16 +00:00
|
|
|
/** Token search the string, means that needle must be surrounded by some separator chars, like whitespace or puctuation.
|
|
|
|
*/
|
|
|
|
template <typename TokenSearcher, bool negate_result = false>
|
|
|
|
struct HasTokenImpl
|
|
|
|
{
|
|
|
|
using ResultType = UInt8;
|
|
|
|
|
|
|
|
static constexpr bool use_default_implementation_for_constants = true;
|
2020-08-01 21:14:23 +00:00
|
|
|
static constexpr bool supports_start_pos = false;
|
2020-03-29 17:04:16 +00:00
|
|
|
|
|
|
|
static void vectorConstant(
|
2020-08-01 21:14:23 +00:00
|
|
|
const ColumnString::Chars & data,
|
|
|
|
const ColumnString::Offsets & offsets,
|
|
|
|
const std::string & pattern,
|
|
|
|
const ColumnPtr & start_pos,
|
|
|
|
PaddedPODArray<UInt8> & res)
|
2020-03-29 17:04:16 +00:00
|
|
|
{
|
2020-08-01 21:14:23 +00:00
|
|
|
if (start_pos != nullptr) {
|
|
|
|
throw Exception("Function 'hasToken' does not support start_pos argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
|
|
|
}
|
|
|
|
|
2020-03-29 17:04:16 +00:00
|
|
|
if (offsets.empty())
|
|
|
|
return;
|
|
|
|
|
|
|
|
const UInt8 * begin = data.data();
|
|
|
|
const UInt8 * pos = begin;
|
|
|
|
const UInt8 * end = pos + data.size();
|
|
|
|
|
|
|
|
/// The current index in the array of strings.
|
|
|
|
size_t i = 0;
|
|
|
|
|
|
|
|
TokenSearcher searcher(pattern.data(), pattern.size(), end - pos);
|
|
|
|
|
|
|
|
/// We will search for the next occurrence in all rows at once.
|
|
|
|
while (pos < end && end != (pos = searcher.search(pos, end - pos)))
|
|
|
|
{
|
|
|
|
/// Let's determine which index it refers to.
|
|
|
|
while (begin + offsets[i] <= pos)
|
|
|
|
{
|
|
|
|
res[i] = negate_result;
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// We check that the entry does not pass through the boundaries of strings.
|
|
|
|
if (pos + pattern.size() < begin + offsets[i])
|
|
|
|
res[i] = !negate_result;
|
|
|
|
else
|
|
|
|
res[i] = negate_result;
|
|
|
|
|
|
|
|
pos = begin + offsets[i];
|
|
|
|
++i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Tail, in which there can be no substring.
|
|
|
|
if (i < res.size())
|
|
|
|
memset(&res[i], negate_result, (res.size() - i) * sizeof(res[0]));
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename... Args>
|
|
|
|
static void vectorVector(Args &&...)
|
|
|
|
{
|
|
|
|
throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Search different needles in single haystack.
|
|
|
|
template <typename... Args>
|
|
|
|
static void constantVector(Args &&...)
|
|
|
|
{
|
|
|
|
throw Exception("Function 'hasToken' does not support non-constant needle argument", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename... Args>
|
|
|
|
static void vectorFixedConstant(Args &&...)
|
|
|
|
{
|
|
|
|
throw Exception("Functions 'hasToken' don't support FixedString haystack argument", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|