2020-05-06 23:21:13 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-10-02 07:13:14 +00:00
|
|
|
#include <base/types.h>
|
2022-06-26 16:12:17 +00:00
|
|
|
#include <Columns/ColumnArray.h>
|
2020-05-06 23:21:13 +00:00
|
|
|
#include <Columns/ColumnString.h>
|
|
|
|
#include <DataTypes/DataTypesNumber.h>
|
2022-06-24 14:12:38 +00:00
|
|
|
#include <Functions/checkHyperscanRegexp.h>
|
2020-05-06 23:21:13 +00:00
|
|
|
#include "Regexps.h"
|
|
|
|
|
2021-10-27 23:10:39 +00:00
|
|
|
#include "config_functions.h"
|
|
|
|
#include <Common/config.h>
|
2020-05-06 23:21:13 +00:00
|
|
|
|
2022-06-17 10:15:19 +00:00
|
|
|
#if USE_VECTORSCAN
|
2020-05-06 23:21:13 +00:00
|
|
|
# include <hs.h>
|
|
|
|
#else
|
|
|
|
# include "MatchImpl.h"
|
2022-07-06 21:36:14 +00:00
|
|
|
#include <Common/Volnitsky.h>
|
2020-05-06 23:21:13 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_ALLOCATE_MEMORY;
|
2022-06-25 15:53:11 +00:00
|
|
|
extern const int FUNCTION_NOT_ALLOWED;
|
|
|
|
extern const int HYPERSCAN_CANNOT_SCAN_TEXT;
|
2020-05-06 23:21:13 +00:00
|
|
|
extern const int NOT_IMPLEMENTED;
|
|
|
|
extern const int TOO_MANY_BYTES;
|
|
|
|
}
|
|
|
|
|
2022-06-28 07:51:36 +00:00
|
|
|
/// For more readable instantiations of MultiMatchAnyImpl<>
|
2022-06-25 15:28:15 +00:00
|
|
|
struct MultiMatchTraits
|
|
|
|
{
|
|
|
|
enum class Find
|
|
|
|
{
|
|
|
|
Any,
|
|
|
|
AnyIndex
|
|
|
|
};
|
|
|
|
};
|
2020-05-06 23:21:13 +00:00
|
|
|
|
2022-06-25 15:28:15 +00:00
|
|
|
template <typename Name, typename ResultType_, MultiMatchTraits::Find Find, bool WithEditDistance>
|
2020-05-06 23:21:13 +00:00
|
|
|
struct MultiMatchAnyImpl
|
|
|
|
{
|
2022-06-24 13:34:40 +00:00
|
|
|
using ResultType = ResultType_;
|
|
|
|
|
2022-06-25 15:28:15 +00:00
|
|
|
static constexpr bool FindAny = (Find == MultiMatchTraits::Find::Any);
|
|
|
|
static constexpr bool FindAnyIndex = (Find == MultiMatchTraits::Find::AnyIndex);
|
|
|
|
|
2020-05-06 23:21:13 +00:00
|
|
|
/// Variable for understanding, if we used offsets for the output, most
|
|
|
|
/// likely to determine whether the function returns ColumnVector of ColumnArray.
|
|
|
|
static constexpr bool is_column_array = false;
|
2021-09-21 16:43:46 +00:00
|
|
|
static constexpr auto name = Name::name;
|
|
|
|
|
2020-05-06 23:21:13 +00:00
|
|
|
static auto getReturnType()
|
|
|
|
{
|
|
|
|
return std::make_shared<DataTypeNumber<ResultType>>();
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vectorConstant(
|
|
|
|
const ColumnString::Chars & haystack_data,
|
|
|
|
const ColumnString::Offsets & haystack_offsets,
|
2022-06-26 16:12:17 +00:00
|
|
|
const Array & needles_arr,
|
2022-06-24 13:34:40 +00:00
|
|
|
PaddedPODArray<ResultType> & res,
|
2022-06-24 14:12:38 +00:00
|
|
|
PaddedPODArray<UInt64> & offsets,
|
2022-06-25 15:53:11 +00:00
|
|
|
bool allow_hyperscan,
|
2022-06-24 14:12:38 +00:00
|
|
|
size_t max_hyperscan_regexp_length,
|
|
|
|
size_t max_hyperscan_regexp_total_length)
|
2020-05-06 23:21:13 +00:00
|
|
|
{
|
2022-06-26 16:12:17 +00:00
|
|
|
vectorConstant(haystack_data, haystack_offsets, needles_arr, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
2020-05-06 23:21:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void vectorConstant(
|
|
|
|
const ColumnString::Chars & haystack_data,
|
|
|
|
const ColumnString::Offsets & haystack_offsets,
|
2022-06-26 16:12:17 +00:00
|
|
|
const Array & needles_arr,
|
2022-06-24 13:34:40 +00:00
|
|
|
PaddedPODArray<ResultType> & res,
|
2022-06-26 16:45:16 +00:00
|
|
|
PaddedPODArray<UInt64> & /*offsets*/,
|
2022-06-24 14:12:38 +00:00
|
|
|
[[maybe_unused]] std::optional<UInt32> edit_distance,
|
2022-06-25 15:53:11 +00:00
|
|
|
bool allow_hyperscan,
|
2022-06-24 14:12:38 +00:00
|
|
|
size_t max_hyperscan_regexp_length,
|
|
|
|
size_t max_hyperscan_regexp_total_length)
|
2020-05-06 23:21:13 +00:00
|
|
|
{
|
2022-06-25 15:53:11 +00:00
|
|
|
if (!allow_hyperscan)
|
|
|
|
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
|
|
|
|
|
2022-06-26 16:12:17 +00:00
|
|
|
std::vector<std::string_view> needles;
|
|
|
|
needles.reserve(needles_arr.size());
|
|
|
|
for (const auto & needle : needles_arr)
|
|
|
|
needles.emplace_back(needle.get<String>());
|
|
|
|
|
2022-06-24 14:12:38 +00:00
|
|
|
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
|
|
|
|
2020-05-06 23:21:13 +00:00
|
|
|
res.resize(haystack_offsets.size());
|
2022-07-08 11:18:53 +00:00
|
|
|
|
|
|
|
if (needles_arr.empty())
|
|
|
|
{
|
|
|
|
std::fill(res.begin(), res.end(), 0);
|
|
|
|
return;
|
|
|
|
}
|
2022-06-17 10:15:19 +00:00
|
|
|
#if USE_VECTORSCAN
|
2022-08-16 09:56:53 +00:00
|
|
|
MultiRegexps::DeferredConstructedRegexpsPtr deferred_constructed_regexps = MultiRegexps::getOrSet</*SaveIndices*/ FindAnyIndex, WithEditDistance>(needles, edit_distance);
|
|
|
|
MultiRegexps::Regexps * regexps = deferred_constructed_regexps->get();
|
|
|
|
|
2020-05-06 23:21:13 +00:00
|
|
|
hs_scratch_t * scratch = nullptr;
|
2022-08-16 09:56:53 +00:00
|
|
|
hs_error_t err = hs_clone_scratch(regexps->getScratch(), &scratch);
|
2020-05-06 23:21:13 +00:00
|
|
|
|
|
|
|
if (err != HS_SUCCESS)
|
2022-06-17 10:15:19 +00:00
|
|
|
throw Exception("Could not clone scratch space for vectorscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
2020-05-06 23:21:13 +00:00
|
|
|
|
|
|
|
MultiRegexps::ScratchPtr smart_scratch(scratch);
|
|
|
|
|
|
|
|
auto on_match = []([[maybe_unused]] unsigned int id,
|
|
|
|
unsigned long long /* from */, // NOLINT
|
|
|
|
unsigned long long /* to */, // NOLINT
|
|
|
|
unsigned int /* flags */,
|
|
|
|
void * context) -> int
|
|
|
|
{
|
|
|
|
if constexpr (FindAnyIndex)
|
2022-06-24 13:34:40 +00:00
|
|
|
*reinterpret_cast<ResultType *>(context) = id;
|
2020-05-06 23:21:13 +00:00
|
|
|
else if constexpr (FindAny)
|
2022-06-24 13:34:40 +00:00
|
|
|
*reinterpret_cast<ResultType *>(context) = 1;
|
2020-05-06 23:21:13 +00:00
|
|
|
/// Once we hit the callback, there is no need to search for others.
|
|
|
|
return 1;
|
|
|
|
};
|
|
|
|
const size_t haystack_offsets_size = haystack_offsets.size();
|
|
|
|
UInt64 offset = 0;
|
|
|
|
for (size_t i = 0; i < haystack_offsets_size; ++i)
|
|
|
|
{
|
|
|
|
UInt64 length = haystack_offsets[i] - offset - 1;
|
2022-06-28 07:51:36 +00:00
|
|
|
/// vectorscan restriction.
|
2020-05-06 23:21:13 +00:00
|
|
|
if (length > std::numeric_limits<UInt32>::max())
|
|
|
|
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
|
2022-06-28 07:51:36 +00:00
|
|
|
/// zero the result, scan, check, update the offset.
|
2020-05-06 23:21:13 +00:00
|
|
|
res[i] = 0;
|
|
|
|
err = hs_scan(
|
2022-08-16 09:56:53 +00:00
|
|
|
regexps->getDB(),
|
2020-05-06 23:21:13 +00:00
|
|
|
reinterpret_cast<const char *>(haystack_data.data()) + offset,
|
|
|
|
length,
|
|
|
|
0,
|
|
|
|
smart_scratch.get(),
|
|
|
|
on_match,
|
|
|
|
&res[i]);
|
|
|
|
if (err != HS_SUCCESS && err != HS_SCAN_TERMINATED)
|
2022-06-17 10:15:19 +00:00
|
|
|
throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
|
2020-05-06 23:21:13 +00:00
|
|
|
offset = haystack_offsets[i];
|
|
|
|
}
|
|
|
|
#else
|
2022-06-28 07:51:36 +00:00
|
|
|
/// fallback if vectorscan is not compiled
|
2022-06-25 15:28:15 +00:00
|
|
|
if constexpr (WithEditDistance)
|
2020-05-06 23:21:13 +00:00
|
|
|
throw Exception(
|
2022-06-17 10:15:19 +00:00
|
|
|
"Edit distance multi-search is not implemented when vectorscan is off",
|
2020-05-06 23:21:13 +00:00
|
|
|
ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
PaddedPODArray<UInt8> accum(res.size());
|
|
|
|
memset(res.data(), 0, res.size() * sizeof(res.front()));
|
|
|
|
memset(accum.data(), 0, accum.size());
|
|
|
|
for (size_t j = 0; j < needles.size(); ++j)
|
|
|
|
{
|
2022-08-15 18:58:46 +00:00
|
|
|
MatchImpl<Name, MatchTraits::Syntax::Re2, MatchTraits::Case::Sensitive, MatchTraits::Result::DontNegate>::vectorConstant(haystack_data, haystack_offsets, String(needles[j].data(), needles[j].size()), nullptr, accum);
|
2020-05-06 23:21:13 +00:00
|
|
|
for (size_t i = 0; i < res.size(); ++i)
|
|
|
|
{
|
|
|
|
if constexpr (FindAny)
|
|
|
|
res[i] |= accum[i];
|
|
|
|
else if (FindAnyIndex && accum[i])
|
|
|
|
res[i] = j + 1;
|
|
|
|
}
|
|
|
|
}
|
2022-06-26 16:45:16 +00:00
|
|
|
#endif // USE_VECTORSCAN
|
|
|
|
}
|
|
|
|
|
|
|
|
static void vectorVector(
|
|
|
|
const ColumnString::Chars & haystack_data,
|
|
|
|
const ColumnString::Offsets & haystack_offsets,
|
2022-07-06 21:36:14 +00:00
|
|
|
const IColumn & needles_data,
|
|
|
|
const ColumnArray::Offsets & needles_offsets,
|
2022-06-26 16:45:16 +00:00
|
|
|
PaddedPODArray<ResultType> & res,
|
|
|
|
PaddedPODArray<UInt64> & offsets,
|
|
|
|
bool allow_hyperscan,
|
|
|
|
size_t max_hyperscan_regexp_length,
|
|
|
|
size_t max_hyperscan_regexp_total_length)
|
|
|
|
{
|
2022-07-06 21:36:14 +00:00
|
|
|
vectorVector(haystack_data, haystack_offsets, needles_data, needles_offsets, res, offsets, std::nullopt, allow_hyperscan, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
2022-06-26 16:45:16 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void vectorVector(
|
2022-07-06 21:36:14 +00:00
|
|
|
const ColumnString::Chars & haystack_data,
|
|
|
|
const ColumnString::Offsets & haystack_offsets,
|
|
|
|
const IColumn & needles_data,
|
|
|
|
const ColumnArray::Offsets & needles_offsets,
|
|
|
|
PaddedPODArray<ResultType> & res,
|
2022-06-26 16:45:16 +00:00
|
|
|
PaddedPODArray<UInt64> & /*offsets*/,
|
2022-07-06 21:36:14 +00:00
|
|
|
std::optional<UInt32> edit_distance,
|
2022-06-26 16:45:16 +00:00
|
|
|
bool allow_hyperscan,
|
2022-07-06 21:36:14 +00:00
|
|
|
size_t max_hyperscan_regexp_length,
|
|
|
|
size_t max_hyperscan_regexp_total_length)
|
2022-06-26 16:45:16 +00:00
|
|
|
{
|
|
|
|
if (!allow_hyperscan)
|
|
|
|
throw Exception(ErrorCodes::FUNCTION_NOT_ALLOWED, "Hyperscan functions are disabled, because setting 'allow_hyperscan' is set to 0");
|
|
|
|
|
|
|
|
res.resize(haystack_offsets.size());
|
|
|
|
#if USE_VECTORSCAN
|
|
|
|
size_t prev_haystack_offset = 0;
|
2022-07-07 20:25:26 +00:00
|
|
|
size_t prev_needles_offset = 0;
|
2022-07-06 21:36:14 +00:00
|
|
|
|
|
|
|
const ColumnString * needles_data_string = checkAndGetColumn<ColumnString>(&needles_data);
|
|
|
|
|
|
|
|
std::vector<std::string_view> needles;
|
|
|
|
|
2022-06-26 16:45:16 +00:00
|
|
|
for (size_t i = 0; i < haystack_offsets.size(); ++i)
|
|
|
|
{
|
2022-07-07 20:25:26 +00:00
|
|
|
needles.reserve(needles_offsets[i] - prev_needles_offset);
|
2022-06-26 16:45:16 +00:00
|
|
|
|
2022-07-07 20:25:26 +00:00
|
|
|
for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j)
|
2022-07-06 21:36:14 +00:00
|
|
|
{
|
2022-07-07 20:25:26 +00:00
|
|
|
needles.emplace_back(needles_data_string->getDataAt(j).toView());
|
2022-07-06 21:36:14 +00:00
|
|
|
}
|
2022-06-26 16:45:16 +00:00
|
|
|
|
2022-07-08 11:18:53 +00:00
|
|
|
if (needles.empty())
|
|
|
|
{
|
|
|
|
res[i] = 0;
|
|
|
|
prev_haystack_offset = haystack_offsets[i];
|
|
|
|
prev_needles_offset = needles_offsets[i];
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-06-26 16:45:16 +00:00
|
|
|
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
|
|
|
|
2022-08-16 09:56:53 +00:00
|
|
|
MultiRegexps::DeferredConstructedRegexpsPtr deferred_constructed_regexps = MultiRegexps::getOrSet</*SaveIndices*/ FindAnyIndex, WithEditDistance>(needles, edit_distance);
|
|
|
|
MultiRegexps::Regexps * regexps = deferred_constructed_regexps->get();
|
2022-06-26 16:45:16 +00:00
|
|
|
hs_scratch_t * scratch = nullptr;
|
2022-08-16 09:56:53 +00:00
|
|
|
hs_error_t err = hs_clone_scratch(regexps->getScratch(), &scratch);
|
2022-06-26 16:45:16 +00:00
|
|
|
|
|
|
|
if (err != HS_SUCCESS)
|
|
|
|
throw Exception("Could not clone scratch space for vectorscan", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
|
|
|
|
|
|
|
MultiRegexps::ScratchPtr smart_scratch(scratch);
|
|
|
|
|
|
|
|
auto on_match = []([[maybe_unused]] unsigned int id,
|
|
|
|
unsigned long long /* from */, // NOLINT
|
|
|
|
unsigned long long /* to */, // NOLINT
|
|
|
|
unsigned int /* flags */,
|
|
|
|
void * context) -> int
|
|
|
|
{
|
|
|
|
if constexpr (FindAnyIndex)
|
|
|
|
*reinterpret_cast<ResultType *>(context) = id;
|
|
|
|
else if constexpr (FindAny)
|
|
|
|
*reinterpret_cast<ResultType *>(context) = 1;
|
|
|
|
/// Once we hit the callback, there is no need to search for others.
|
|
|
|
return 1;
|
|
|
|
};
|
|
|
|
|
|
|
|
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
|
|
|
|
2022-06-28 07:51:36 +00:00
|
|
|
/// vectorscan restriction.
|
2022-06-26 16:45:16 +00:00
|
|
|
if (cur_haystack_length > std::numeric_limits<UInt32>::max())
|
|
|
|
throw Exception("Too long string to search", ErrorCodes::TOO_MANY_BYTES);
|
|
|
|
|
2022-06-28 07:51:36 +00:00
|
|
|
/// zero the result, scan, check, update the offset.
|
2022-06-26 16:45:16 +00:00
|
|
|
res[i] = 0;
|
|
|
|
err = hs_scan(
|
2022-08-16 09:56:53 +00:00
|
|
|
regexps->getDB(),
|
2022-06-26 16:45:16 +00:00
|
|
|
reinterpret_cast<const char *>(haystack_data.data()) + prev_haystack_offset,
|
|
|
|
cur_haystack_length,
|
|
|
|
0,
|
|
|
|
smart_scratch.get(),
|
|
|
|
on_match,
|
|
|
|
&res[i]);
|
|
|
|
if (err != HS_SUCCESS && err != HS_SCAN_TERMINATED)
|
|
|
|
throw Exception("Failed to scan with vectorscan", ErrorCodes::HYPERSCAN_CANNOT_SCAN_TEXT);
|
|
|
|
|
|
|
|
prev_haystack_offset = haystack_offsets[i];
|
2022-07-07 20:25:26 +00:00
|
|
|
prev_needles_offset = needles_offsets[i];
|
2022-07-06 21:36:14 +00:00
|
|
|
needles.clear();
|
2022-06-26 16:45:16 +00:00
|
|
|
}
|
|
|
|
#else
|
2022-06-28 07:51:36 +00:00
|
|
|
/// fallback if vectorscan is not compiled
|
2022-07-06 21:36:14 +00:00
|
|
|
/// -- the code is copypasted from vectorVector() in MatchImpl.h and quite complex code ... all of it can be removed once vectorscan is
|
|
|
|
/// enabled on all platforms (#38906)
|
2022-06-26 16:45:16 +00:00
|
|
|
if constexpr (WithEditDistance)
|
|
|
|
throw Exception(
|
|
|
|
"Edit distance multi-search is not implemented when vectorscan is off",
|
|
|
|
ErrorCodes::NOT_IMPLEMENTED);
|
|
|
|
|
2022-07-06 21:36:14 +00:00
|
|
|
(void)edit_distance;
|
|
|
|
|
2022-06-26 16:45:16 +00:00
|
|
|
memset(res.data(), 0, res.size() * sizeof(res.front()));
|
|
|
|
|
|
|
|
size_t prev_haystack_offset = 0;
|
2022-07-07 20:25:26 +00:00
|
|
|
size_t prev_needles_offset = 0;
|
2022-06-26 16:45:16 +00:00
|
|
|
|
2022-07-06 21:36:14 +00:00
|
|
|
const ColumnString * needles_data_string = checkAndGetColumn<ColumnString>(&needles_data);
|
|
|
|
|
|
|
|
std::vector<std::string_view> needles;
|
|
|
|
|
2022-06-26 16:45:16 +00:00
|
|
|
for (size_t i = 0; i < haystack_offsets.size(); ++i)
|
|
|
|
{
|
|
|
|
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
|
|
|
|
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
|
|
|
|
2022-07-07 20:25:26 +00:00
|
|
|
needles.reserve(needles_offsets[i] - prev_needles_offset);
|
2022-06-26 16:45:16 +00:00
|
|
|
|
2022-07-07 20:25:26 +00:00
|
|
|
for (size_t j = prev_needles_offset; j < needles_offsets[i]; ++j)
|
2022-07-06 21:36:14 +00:00
|
|
|
{
|
2022-07-07 20:25:26 +00:00
|
|
|
needles.emplace_back(needles_data_string->getDataAt(j).toView());
|
2022-07-06 21:36:14 +00:00
|
|
|
}
|
2022-06-26 16:45:16 +00:00
|
|
|
|
2022-07-08 11:18:53 +00:00
|
|
|
if (needles.empty())
|
|
|
|
{
|
|
|
|
prev_haystack_offset = haystack_offsets[i];
|
|
|
|
prev_needles_offset = needles_offsets[i];
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2022-06-26 16:45:16 +00:00
|
|
|
checkHyperscanRegexp(needles, max_hyperscan_regexp_length, max_hyperscan_regexp_total_length);
|
|
|
|
|
2022-07-06 21:36:14 +00:00
|
|
|
for (size_t j = 0; j < needles.size(); ++j)
|
2022-06-26 16:45:16 +00:00
|
|
|
{
|
2022-07-06 21:36:14 +00:00
|
|
|
String needle(needles[j]);
|
|
|
|
|
2022-06-26 16:45:16 +00:00
|
|
|
const auto & regexp = Regexps::Regexp(Regexps::createRegexp</*like*/ false, /*no_capture*/ true, /*case_insensitive*/ false>(needle));
|
2022-07-06 21:36:14 +00:00
|
|
|
|
|
|
|
String required_substr;
|
|
|
|
bool is_trivial;
|
|
|
|
bool required_substring_is_prefix; /// for `anchored` execution of the regexp.
|
|
|
|
|
|
|
|
regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix);
|
|
|
|
|
|
|
|
if (required_substr.empty())
|
|
|
|
{
|
|
|
|
if (!regexp.getRE2()) /// An empty regexp. Always matches.
|
|
|
|
{
|
|
|
|
if constexpr (FindAny)
|
|
|
|
res[i] |= 1;
|
|
|
|
else if (FindAnyIndex)
|
|
|
|
res[i] = j + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const bool match = regexp.getRE2()->Match(
|
|
|
|
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
|
|
|
0,
|
|
|
|
cur_haystack_length,
|
|
|
|
re2_st::RE2::UNANCHORED,
|
|
|
|
nullptr,
|
|
|
|
0);
|
|
|
|
if constexpr (FindAny)
|
|
|
|
res[i] |= match;
|
|
|
|
else if (FindAnyIndex && match)
|
|
|
|
res[i] = j + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
Volnitsky searcher(required_substr.data(), required_substr.size(), cur_haystack_length);
|
|
|
|
const auto * match = searcher.search(cur_haystack_data, cur_haystack_length);
|
|
|
|
|
|
|
|
if (match == cur_haystack_data + cur_haystack_length)
|
|
|
|
{
|
|
|
|
/// no match
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (is_trivial)
|
|
|
|
{
|
|
|
|
/// no wildcards in pattern
|
|
|
|
if constexpr (FindAny)
|
|
|
|
res[i] |= 1;
|
|
|
|
else if (FindAnyIndex)
|
|
|
|
res[i] = j + 1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
const size_t start_pos = (required_substring_is_prefix) ? (match - cur_haystack_data) : 0;
|
|
|
|
const size_t end_pos = cur_haystack_length;
|
|
|
|
|
|
|
|
const bool match2 = regexp.getRE2()->Match(
|
|
|
|
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
|
|
|
start_pos,
|
|
|
|
end_pos,
|
|
|
|
re2_st::RE2::UNANCHORED,
|
|
|
|
nullptr,
|
|
|
|
0);
|
|
|
|
if constexpr (FindAny)
|
|
|
|
res[i] |= match2;
|
|
|
|
else if (FindAnyIndex && match2)
|
|
|
|
res[i] = j + 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-06-26 16:45:16 +00:00
|
|
|
}
|
2022-07-06 21:36:14 +00:00
|
|
|
|
2022-06-26 16:45:16 +00:00
|
|
|
prev_haystack_offset = haystack_offsets[i];
|
2022-07-07 20:25:26 +00:00
|
|
|
prev_needles_offset = needles_offsets[i];
|
2022-07-06 21:36:14 +00:00
|
|
|
needles.clear();
|
2022-06-26 16:45:16 +00:00
|
|
|
}
|
2022-06-17 10:15:19 +00:00
|
|
|
#endif // USE_VECTORSCAN
|
2020-05-06 23:21:13 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|