mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-04 05:22:17 +00:00
3478db9fb6
- This is not needed for non-const regexp array arguments (the cardinality of arrays is fixed per column) but it cleans up the code and runs the check only in functions which have restrictions on the number of patterns. - For functions using hyperscans, it was checked that the number of regexes is < 2^32. Removed the check because I don't think anyone will every specify 4 billion patterns.
66 lines
2.4 KiB
C++
66 lines
2.4 KiB
C++
#pragma once
|
|
|
|
#include <vector>
|
|
#include <Columns/ColumnString.h>
|
|
|
|
|
|
namespace DB
|
|
{
|
|
|
|
namespace ErrorCodes
|
|
{
|
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
|
}
|
|
|
|
template <typename Name, typename Impl>
|
|
struct MultiSearchFirstIndexImpl
|
|
{
|
|
using ResultType = UInt64;
|
|
static constexpr bool is_using_hyperscan = false;
|
|
/// Variable for understanding, if we used offsets for the output, most
|
|
/// likely to determine whether the function returns ColumnVector of ColumnArray.
|
|
static constexpr bool is_column_array = false;
|
|
static constexpr auto name = Name::name;
|
|
|
|
static auto getReturnType() { return std::make_shared<DataTypeNumber<ResultType>>(); }
|
|
|
|
static void vectorConstant(
|
|
const ColumnString::Chars & haystack_data,
|
|
const ColumnString::Offsets & haystack_offsets,
|
|
const std::vector<std::string_view> & needles,
|
|
PaddedPODArray<UInt64> & res,
|
|
[[maybe_unused]] PaddedPODArray<UInt64> & offsets,
|
|
size_t /*max_hyperscan_regexp_length*/,
|
|
size_t /*max_hyperscan_regexp_total_length*/)
|
|
{
|
|
// For performance of Volnitsky search, it is crucial to save only one byte for pattern number.
|
|
if (needles.size() > std::numeric_limits<UInt8>::max())
|
|
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
|
"Number of arguments for function {} doesn't match: passed {}, should be at most {}",
|
|
name, std::to_string(needles.size()), std::to_string(std::numeric_limits<UInt8>::max()));
|
|
|
|
auto searcher = Impl::createMultiSearcherInBigHaystack(needles);
|
|
const size_t haystack_string_size = haystack_offsets.size();
|
|
res.resize(haystack_string_size);
|
|
size_t iteration = 0;
|
|
while (searcher.hasMoreToSearch())
|
|
{
|
|
size_t prev_offset = 0;
|
|
for (size_t j = 0; j < haystack_string_size; ++j)
|
|
{
|
|
const auto * haystack = &haystack_data[prev_offset];
|
|
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
|
|
/// hasMoreToSearch traverse needles in increasing order
|
|
if (iteration == 0 || res[j] == 0)
|
|
res[j] = searcher.searchOneFirstIndex(haystack, haystack_end);
|
|
prev_offset = haystack_offsets[j];
|
|
}
|
|
++iteration;
|
|
}
|
|
if (iteration == 0)
|
|
std::fill(res.begin(), res.end(), 0);
|
|
}
|
|
};
|
|
|
|
}
|