mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Hyperscan multi regular expressions search
This commit is contained in:
parent
7814ccbb29
commit
f8001b41ba
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -76,3 +76,6 @@
|
||||
[submodule "contrib/brotli"]
|
||||
path = contrib/brotli
|
||||
url = https://github.com/google/brotli.git
|
||||
[submodule "contrib/hyperscan"]
|
||||
path = contrib/hyperscan
|
||||
url = https://github.com/ClickHouse-Extras/hyperscan.git
|
||||
|
@ -318,6 +318,7 @@ include (cmake/find_pdqsort.cmake)
|
||||
include (cmake/find_hdfs3.cmake) # uses protobuf
|
||||
include (cmake/find_consistent-hashing.cmake)
|
||||
include (cmake/find_base64.cmake)
|
||||
include (cmake/find_hyperscan.cmake)
|
||||
find_contrib_lib(cityhash)
|
||||
find_contrib_lib(farmhash)
|
||||
find_contrib_lib(metrohash)
|
||||
|
7
cmake/find_hyperscan.cmake
Normal file
7
cmake/find_hyperscan.cmake
Normal file
@ -0,0 +1,7 @@
|
||||
if (HAVE_SSSE3)
|
||||
set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src)
|
||||
set (HYPERSCAN_LIBRARY hs)
|
||||
set (USE_HYPERSCAN 1)
|
||||
set (USE_INTERNAL_HYPERSCAN_LIBRARY 1)
|
||||
message (STATUS "Using hyperscan: ${HYPERSCAN_INCLUDE_DIR} " : ${HYPERSCAN_LIBRARY})
|
||||
endif()
|
4
contrib/CMakeLists.txt
vendored
4
contrib/CMakeLists.txt
vendored
@ -304,3 +304,7 @@ endif ()
|
||||
if (USE_BASE64)
|
||||
add_subdirectory (base64-cmake)
|
||||
endif()
|
||||
|
||||
if (USE_HYPERSCAN)
|
||||
add_subdirectory (hyperscan)
|
||||
endif()
|
||||
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit 6a96e8b59f76148eb8ad54a9d15259f8ce84c606
|
||||
Subproject commit 32abf16beb7bb8b243a4d100ccdd6acb271738c4
|
1
contrib/hyperscan
vendored
Submodule
1
contrib/hyperscan
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 1c8d85e309afe6932fe37acfc712766c1f80b793
|
@ -64,3 +64,8 @@ if (USE_XXHASH)
|
||||
target_link_libraries(clickhouse_functions PRIVATE ${XXHASH_LIBRARY})
|
||||
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${XXHASH_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
if (USE_HYPERSCAN)
|
||||
target_link_libraries (clickhouse_functions PRIVATE ${HYPERSCAN_LIBRARY})
|
||||
target_include_directories (clickhouse_functions SYSTEM PRIVATE ${HYPERSCAN_INCLUDE_DIR})
|
||||
endif ()
|
||||
|
@ -15,6 +15,10 @@
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#ifdef __SSSE3__
|
||||
# include <hs.h>
|
||||
#endif
|
||||
|
||||
#if USE_RE2_ST
|
||||
# include <re2_st/re2.h> // Y_IGNORE
|
||||
#else
|
||||
@ -334,13 +338,13 @@ struct MultiPositionImpl
|
||||
template <typename Impl>
|
||||
struct MultiSearchImpl
|
||||
{
|
||||
using ResultType = UInt64;
|
||||
using ResultType = UInt8;
|
||||
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const std::vector<StringRef> & needles,
|
||||
PaddedPODArray<UInt64> & res)
|
||||
PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
Impl::createMultiSearcherInBigHaystack(needles).search(haystack_data, haystack_offsets, res);
|
||||
}
|
||||
@ -524,8 +528,8 @@ struct MatchImpl
|
||||
res[i] = !revert;
|
||||
else
|
||||
{
|
||||
const char * str_data = reinterpret_cast<const char *>(&data[i != 0 ? offsets[i - 1] : 0]);
|
||||
size_t str_size = (i != 0 ? offsets[i] - offsets[i - 1] : offsets[0]) - 1;
|
||||
const char * str_data = reinterpret_cast<const char *>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1] - 1;
|
||||
|
||||
/** Even in the case of `required_substring_is_prefix` use UNANCHORED check for regexp,
|
||||
* so that it can match when `required_substring` occurs into the string several times,
|
||||
@ -581,6 +585,65 @@ struct MatchImpl
|
||||
};
|
||||
|
||||
|
||||
struct MultiMatchImpl
|
||||
{
|
||||
using ResultType = UInt8;
|
||||
|
||||
static void vector_constant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const std::vector<StringRef> & needles,
|
||||
PaddedPODArray<UInt8> & res)
|
||||
{
|
||||
#ifdef __SSSE3__
|
||||
using ScratchPtr = std::unique_ptr<hs_scratch_t, DB::MultiRegexps::HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
|
||||
|
||||
const auto & hyperscan_regex = MultiRegexps::get(needles);
|
||||
hs_scratch_t * scratch = nullptr;
|
||||
hs_error_t err = hs_alloc_scratch(hyperscan_regex->get(), &scratch);
|
||||
if (err != HS_SUCCESS)
|
||||
throw Exception("Could not allocate scratch space for hyperscan.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
||||
ScratchPtr smart_scratch(scratch);
|
||||
|
||||
auto on_match = [](unsigned int /* id */,
|
||||
unsigned long long /* from */,
|
||||
unsigned long long /* to */,
|
||||
unsigned int /* flags */,
|
||||
void * context) -> int
|
||||
{
|
||||
*reinterpret_cast<UInt8 *>(context) = 1;
|
||||
return 0;
|
||||
};
|
||||
const size_t haystack_offsets_size = haystack_offsets.size();
|
||||
size_t offset = 0;
|
||||
for (size_t i = 0; i < haystack_offsets_size; ++i)
|
||||
{
|
||||
res[i] = 0;
|
||||
hs_scan(
|
||||
hyperscan_regex->get(),
|
||||
reinterpret_cast<const char *>(haystack_data.data()) + offset,
|
||||
haystack_offsets[i] - offset - 1,
|
||||
0,
|
||||
smart_scratch.get(),
|
||||
on_match,
|
||||
&res[i]);
|
||||
offset = haystack_offsets[i];
|
||||
}
|
||||
#else
|
||||
PaddedPODArray<UInt8> accum(res.size());
|
||||
memset(res.data(), 0, res.size());
|
||||
memset(accum.data(), 0, accum.size());
|
||||
for (const StringRef ref : needles)
|
||||
{
|
||||
MatchImpl<false, false>::vector_constant(haystack_data, haystack_offsets, ref.toString(), accum);
|
||||
for (size_t i = 0; i < res.size(); ++i)
|
||||
res[i] |= accum[i];
|
||||
}
|
||||
#endif // __SSSE3__
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct ExtractImpl
|
||||
{
|
||||
static void vector(
|
||||
@ -1150,6 +1213,10 @@ struct NameNotLike
|
||||
{
|
||||
static constexpr auto name = "notLike";
|
||||
};
|
||||
struct NameMultiMatch
|
||||
{
|
||||
static constexpr auto name = "multiMatch";
|
||||
};
|
||||
struct NameExtract
|
||||
{
|
||||
static constexpr auto name = "extract";
|
||||
@ -1201,6 +1268,7 @@ using FunctionFirstMatchCaseInsensitiveUTF8
|
||||
using FunctionMatch = FunctionsStringSearch<MatchImpl<false>, NameMatch>;
|
||||
using FunctionLike = FunctionsStringSearch<MatchImpl<true>, NameLike>;
|
||||
using FunctionNotLike = FunctionsStringSearch<MatchImpl<true, true>, NameNotLike>;
|
||||
using FunctionMultiMatch = FunctionsMultiStringSearch<MultiMatchImpl, NameMultiMatch, std::numeric_limits<UInt64>::max()>;
|
||||
using FunctionExtract = FunctionsStringSearchToString<ExtractImpl, NameExtract>;
|
||||
using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<true>, NameReplaceOne>;
|
||||
using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<false>, NameReplaceAll>;
|
||||
@ -1238,6 +1306,7 @@ void registerFunctionsStringSearch(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionMatch>();
|
||||
factory.registerFunction<FunctionLike>();
|
||||
factory.registerFunction<FunctionNotLike>();
|
||||
factory.registerFunction<FunctionMultiMatch>();
|
||||
factory.registerFunction<FunctionExtract>();
|
||||
|
||||
factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive);
|
||||
|
@ -26,6 +26,7 @@ namespace DB
|
||||
* notLike(haystack, pattern)
|
||||
*
|
||||
* match(haystack, pattern) - search by regular expression re2; Returns 0 or 1.
|
||||
* multiMatch(haystack, [pattern_1, pattern_2, ..., pattern_n]) -- search by re2 regular expressions pattern_i; Returns 0 or 1 if any pattern_i matches.
|
||||
*
|
||||
* Applies regexp re2 and pulls:
|
||||
* - the first subpattern, if the regexp has a subpattern;
|
||||
@ -269,9 +270,13 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
/// The argument limiting raises from Volnitsky searcher -- it is performance crucial to save only one byte for pattern number.
|
||||
/// But some other searchers use this function, for example, multiMatch -- hyperscan does not have such restrictions
|
||||
template <typename Impl, typename Name, size_t LimitArgs = std::numeric_limits<UInt8>::max()>
|
||||
class FunctionsMultiStringSearch : public IFunction
|
||||
{
|
||||
static_assert(LimitArgs > 0);
|
||||
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionsMultiStringSearch>(); }
|
||||
@ -282,10 +287,10 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (arguments.size() + 1 >= std::numeric_limits<UInt8>::max())
|
||||
if (arguments.size() + 1 >= LimitArgs)
|
||||
throw Exception(
|
||||
"Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size())
|
||||
+ ", should be at most 255.",
|
||||
+ ", should be at most " + std::to_string(LimitArgs) + ".",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
@ -333,6 +338,7 @@ public:
|
||||
|
||||
vec_res.resize(column_haystack_size);
|
||||
|
||||
/// TODO support constant_constant version
|
||||
if (col_haystack_vector)
|
||||
Impl::vector_constant(col_haystack_vector->getChars(), col_haystack_vector->getOffsets(), refs, vec_res);
|
||||
else
|
||||
|
@ -1,19 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/OptimizedRegularExpression.h>
|
||||
#include <Common/ObjectPool.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <Functions/likePatternToRegexp.h>
|
||||
#include <Common/ObjectPool.h>
|
||||
#include <Common/OptimizedRegularExpression.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <common/StringRef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __SSSE3__
|
||||
# include <hs.h>
|
||||
#endif
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event RegexpCreated;
|
||||
extern const Event RegexpCreated;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace Regexps
|
||||
{
|
||||
@ -21,16 +34,22 @@ namespace Regexps
|
||||
using Pool = ObjectPoolMap<Regexp, String>;
|
||||
|
||||
template <bool like>
|
||||
inline Regexp createRegexp(const std::string & pattern, int flags) { return {pattern, flags}; }
|
||||
inline Regexp createRegexp(const std::string & pattern, int flags)
|
||||
{
|
||||
return {pattern, flags};
|
||||
}
|
||||
|
||||
template <>
|
||||
inline Regexp createRegexp<true>(const std::string & pattern, int flags) { return {likePatternToRegexp(pattern), flags}; }
|
||||
inline Regexp createRegexp<true>(const std::string & pattern, int flags)
|
||||
{
|
||||
return {likePatternToRegexp(pattern), flags};
|
||||
}
|
||||
|
||||
template <bool like, bool no_capture>
|
||||
inline Pool::Pointer get(const std::string & pattern)
|
||||
{
|
||||
/// C++11 has thread-safe function-local statics on most modern compilers.
|
||||
static Pool known_regexps; /// Different variables for different pattern parameters.
|
||||
static Pool known_regexps; /// Different variables for different pattern parameters.
|
||||
|
||||
return known_regexps.get(pattern, [&pattern]
|
||||
{
|
||||
@ -44,4 +63,74 @@ namespace Regexps
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SSSE3__
|
||||
|
||||
namespace MultiRegexps
|
||||
{
|
||||
template <typename Deleter, Deleter deleter>
|
||||
struct HyperscanDeleter
|
||||
{
|
||||
template <typename T>
|
||||
void operator()(T * ptr) const
|
||||
{
|
||||
deleter(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
using Regexps = std::unique_ptr<hs_database_t, HyperscanDeleter<decltype(&hs_free_database), &hs_free_database>>;
|
||||
|
||||
using Pool = ObjectPoolMap<Regexps, std::vector<String>>;
|
||||
|
||||
inline Pool::Pointer get(const std::vector<StringRef> & patterns)
|
||||
{
|
||||
/// C++11 has thread-safe function-local statics on most modern compilers.
|
||||
static Pool known_regexps; /// Different variables for different pattern parameters.
|
||||
|
||||
std::vector<String> str_patterns;
|
||||
str_patterns.reserve(patterns.size());
|
||||
for (const StringRef ref : patterns)
|
||||
str_patterns.push_back(ref.toString());
|
||||
|
||||
return known_regexps.get(str_patterns, [&str_patterns]
|
||||
{
|
||||
std::vector<const char *> ptrns;
|
||||
std::vector<unsigned int> flags;
|
||||
ptrns.reserve(str_patterns.size());
|
||||
flags.reserve(str_patterns.size());
|
||||
for (const StringRef ref : str_patterns)
|
||||
{
|
||||
ptrns.push_back(ref.data);
|
||||
flags.push_back(HS_FLAG_DOTALL | HS_FLAG_SINGLEMATCH | HS_FLAG_ALLOWEMPTY);
|
||||
#ifdef __AVX2__
|
||||
flags.back() |= HS_CPU_FEATURES_AVX2;
|
||||
#endif // __AVX2__
|
||||
}
|
||||
hs_database_t * db = nullptr;
|
||||
hs_compile_error_t * compile_error;
|
||||
|
||||
hs_error_t err
|
||||
= hs_compile_multi(ptrns.data(), flags.data(), nullptr, ptrns.size(), HS_MODE_BLOCK, nullptr, &db, &compile_error);
|
||||
if (err != HS_SUCCESS)
|
||||
{
|
||||
std::unique_ptr<
|
||||
hs_compile_error_t,
|
||||
HyperscanDeleter<decltype(&hs_free_compile_error), &hs_free_compile_error>> error(compile_error);
|
||||
|
||||
if (error->expression < 0)
|
||||
throw Exception(String(error->message), ErrorCodes::LOGICAL_ERROR);
|
||||
else
|
||||
throw Exception(
|
||||
"Pattern '" + str_patterns[error->expression] + "' failed with error '" + String(error->message),
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::RegexpCreated);
|
||||
|
||||
return new Regexps{db};
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __SSSE3__
|
||||
|
||||
}
|
||||
|
@ -25,21 +25,29 @@
|
||||
<query><![CDATA[select count(position(URL, 'yandex')), count(position(URL, 'google')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select count(multiPosition(URL, ['yandex', 'google'])) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select count(match(URL, 'yandex|google')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select count(multiMatch(URL, ['yandex', 'google'])) FROM hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'yahoo')), sum(match(URL, 'pikabu')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'yahoo', 'pikabu'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiMatch(URL, ['yandex', 'google', 'yahoo', 'pikabu'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(match(URL, 'yandex|google|yahoo|pikabu')) FROM hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'http')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'http'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiMatch(URL, ['yandex', 'google', 'http'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(match(URL, 'yandex|google|http')) FROM hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[select sum(match(URL, 'yandex')), sum(match(URL, 'google')), sum(match(URL, 'facebook')), sum(match(URL, 'wikipedia')), sum(match(URL, 'reddit')) FROM hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiSearch(URL, ['yandex', 'google', 'facebook', 'wikipedia', 'reddit'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(multiMatch(URL, ['yandex', 'google', 'facebook', 'wikipedia', 'reddit'])) from hits_100m_single]]></query>
|
||||
<query><![CDATA[select sum(match(URL, 'yandex|google|facebook|wikipedia|reddit')) FROM hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[select sum(firstMatch(URL, ['yandex', 'google', 'http', 'facebook', 'google'])) from hits_100m_single]]></query>
|
||||
|
||||
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE multiMatch(URL, ['about/address', 'for_woman', '^https?://lm-company.ruy/$', 'ultimateguitar.com'])]]></query>
|
||||
<query><![CDATA[SELECT count() FROM hits_100m_single WHERE match(URL, 'about/address|for_woman|^https?://lm-company.ruy/$|ultimateguitar.com')]]></query>
|
||||
|
||||
|
||||
<main_metric>
|
||||
<min_time/>
|
||||
</main_metric>
|
||||
|
560
dbms/tests/queries/0_stateless/00926_multimatch.reference
Normal file
560
dbms/tests/queries/0_stateless/00926_multimatch.reference
Normal file
@ -0,0 +1,560 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
74
dbms/tests/queries/0_stateless/00926_multimatch.sql
Normal file
74
dbms/tests/queries/0_stateless/00926_multimatch.sql
Normal file
@ -0,0 +1,74 @@
|
||||
select 0 = multiMatch(materialize('mpnsguhwsitzvuleiwebwjfitmsg'), ['wbirxqoabpblrnvvmjizj', 'cfcxhuvrexyzyjsh', 'oldhtubemyuqlqbwvwwkwin', 'bumoozxdkjglzu', 'intxlfohlxmajjomw', 'dxkeghohv', 'arsvmwwkjeopnlwnan', 'ouugllgowpqtaxslcopkytbfhifaxbgt', 'hkedmjlbcrzvryaopjqdjjc', 'tbqkljywstuahzh', 'o', 'wowoclosyfcuwotmvjygzuzhrery', 'vpefjiffkhlggntcu', 'ytdixvasrorhripzfhjdmlhqksmctyycwp']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('qjjzqexjpgkglgxpzrbqbnskq'), ['vaiatcjacmlffdzsejpdareqzy', 'xspcfzdufkmecud', 'bcvtbuqtctq', 'nkcopwbfytgemkqcfnnno', 'dylxnzuyhq', 'tno', 'scukuhufly', 'cdyquzuqlptv', 'ohluyfeksyxepezdhqmtfmgkvzsyph', 'ualzwtahvqvtijwp', 'jg', 'gwbawqlngzcknzgtmlj', 'qimvjcgbkkp', 'eaedbcgyrdvv', 'qcwrncjoewwedyyewcdkh', 'uqcvhngoqngmitjfxpznqomertqnqcveoqk', 'ydrgjiankgygpm', 'axepgap']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('fdkmtqmxnegwvnjhghjq'), ['vynkybvdmhgeezybbdqfrukibisj', 'knazzamgjjpavwhvdkwigykh', 'peumnifrmdhhmrqqnemw', 'lmsnyvqoisinlaqobxojlwfbi', 'oqwfzs', 'dymudxxeodwjpgbibnkvr', 'vomtfsnizkplgzktqyoiw', 'yoyfuhlpgrzds', 'cefao', 'gi', 'srpgxfjwl', 'etsjusdeiwbfe', 'ikvtzdopxo', 'ljfkavrau', 'soqdhxtenfrkmeic', 'ktprjwfcelzbup', 'pcvuoddqwsaurcqdtjfnczekwni', 'agkqkqxkfbkfgyqliahsljim']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('khljxzxlpcrxpkrfybbfk'), ['', 'lpc', 'rxpkrfybb', 'crxp', '', 'pkr', 'jxzxlpcrxpkrf', '', 'xzxlpcr', 'xpk', 'fyb', 'xzxlpcrxpkrfybbfk', 'k', 'lpcrxp', 'ljxzxlpcr', 'r', 'pkr', 'fk']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('rbrizgjbigvzfnpgmpkqxoqxvdj'), ['ee', 'cohqnb', 'msol', 'yhlujcvhklnhuomy', 'ietn', 'vgmnlkcsybtokrepzrm', 'wspiryefojxysgrzsxyrluykxfnnbzdstcel', 'mxisnsivndbefqxwznimwgazuulupbaihavg', 'vpzdjvqqeizascxmzdhuq', 'pgvncohlxcqjhfkm', 'mbaypcnfapltsegquurahlsruqvipfhrhq', 'ioxjbcyyqujfveujfhnfdfokfcrlsincjbdt', 'cnvlujyowompdrqjwjx', 'wobwed', 'kdfhaoxiuifotmptcmdbk', 'leoamsnorcvtlmokdomkzuo', 'jjw', 'ogugysetxuqmvggneosbsfbonszepsatq']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('uymwxzyjbfegbhgswiqhinf'), ['lizxzbzlwljkr', 'ukxygktlpzuyijcqeqktxenlaqi', 'onperabgbdiafsxwbvpjtyt', 'xfqgoqvhqph', 'aflmcwabtwgmajmmqelxwkaolyyhmdlc', 'yfz', 'meffuiaicvwed', 'hhzvgmifzamgftkifaeowayjrnnzw', 'nwewybtajv', 'ectiye', 'epjeiljegmqqjncubj', 'zsjgftqjrn', 'pssng', 'raqoarfhdoeujulvqmdo']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('omgghgnzjmecpzqmtcvw'), ['fjhlzbszodmzavzg', 'gfofrnwrxprkfiokv', 'jmjiiqpgznlmyrxwewzqzbe', 'pkyrsqkltlmxr', 'crqgkgqkkyujcyoc', 'endagbcxwqhueczuasykmajfsvtcmh', 'xytmxtrnkdysuwltqomehddp', 'etmdxyyfotfyifwvbykghijvwv', 'mwqtgrncyhkfhjdg', 'iuvymofrqpp', 'pgllsdanlhzqhkstwsmzzftp', 'disjylcceufxtjdvhy']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('mznihnmshftvnmmhnrulizzpslq'), ['nrul', 'mshftvnmmhnr', 'z', 'mhnrulizzps', 'hftvnmmhnrul', 'ihnmshftvnmmhnrulizzp', 'izz', '', 'uli', 'nihnmshftvnmmhnru', 'hnrulizzp', 'nrulizz']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('ruqmqrsxrbftvruvahonradau'), ['uqmqrsxrbft', 'ftv', 'tvruvahonrad', 'mqrsxrbftvruvahon', 'rbftvruvah', 'qrsxrbftvru', 'o', 'ahonradau', 'a', 'ft', '', 'u', 'rsxrbftvruvahonradau', 'ruvahon', 'bftvruvahonradau', 'qrsxrbftvru', 't', 'vahonrada', 'vruvahonradau', 'onra']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('gpsevxtcoeexrltyzduyidmtzxf'), ['exrltyzduyid', 'vxtcoeexrltyz', 'xr', 'ltyzduyidmt', 'yzduy', 'exr', 'coeexrltyzduy', 'coeexrltyzduy', 'rlty', 'rltyzduyidm', 'exrltyz', 'xtcoeexrlty', 'vxtcoeexrltyzduyidm', '', 'coeexrl', 'sevxtcoeexrltyzdu', 'dmt', '']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('dyhycfhzyewaikgursyxfkuv'), ['sktnofpugrmyxmbizzrivmhn', 'fhlgadpoqcvktbfzncxbllvwutdawmw', 'eewzjpcgzrqmltbgmhafwlwqb', 'tpogbkyj', 'rtllntxjgkzs', 'mirbvsqexscnzglogigbujgdwjvcv', 'iktwpgjsakemewmahgqza', 'xgfvzkvqgiuoihjjnxwwpznxhz', 'nxaumpaknreklbwynvxdsmatjekdlxvklh', 'zadzwqhgfxqllihuudozxeixyokhny', 'tdqpgfpzexlkslodps', 'slztannufxaabqfcjyfquafgfhfb', 'xvjldhfuwurvkb', 'aecv', 'uycfsughpikqsbcmwvqygdyexkcykhbnau', 'jr']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('vbcsettndwuntnruiyclvvwoo'), ['dwuntnru', '', 'ttndwuntnruiyclvv', 'ntnr', 'nruiyclvvw', 'wo', '', 'bcsettndwuntnruiycl', 'yc', 'untnruiyclvvw', 'csettndwuntnr', 'ntnruiyclvvwo']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('pqqnugshlczcuxhpjxjbcnro'), ['dpeedqy', 'rtsc', 'jdgla', 'qkgudqjiyzvlvsj', 'xmfxawhijgxxtydbd', 'ebgzazqthb', 'wyrjhvhwzhmpybnylirrn', 'iviqbyuclayqketooztwegtkgwnsezfl', 'bhvidy', 'hijctxxweboq', 't', 'osnzfbziidteiaifgaanm']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('loqchlxspwuvvccucskuytr'), ['', 'k', 'qchlxspwu', 'u', 'hlxspwuvv', 'wuvvccucsku', 'vcc', 'uyt', 'uvv', 'spwu', 'ytr', 'wuvvccucs', 'xspwuv', 'lxspwuvvccuc', 'spwuvvccu', 'oqchlxspwuvvccucskuy']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('pjjyzupzwllshlnatiujmwvaofr'), ['lnatiujmwvao', '', 'zupzwllsh', 'nati', 'wllshl', 'hlnatiujmwv', 'mwvao', 'shlnat', 'ati', 'wllshlnatiujmwvao', 'wllshlnatiujmwvaofr', 'nat']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('iketunkleyaqaxdlocci'), ['nkleyaqaxd', 'etunkleyaq', 'yaqaxdlocci', 'tunkleyaq', 'eyaqaxdlocc', 'leyaq', 'nkleyaqaxdl', 'tunkleya', 'kleyaqa', 'etunkleya', 'leyaqa', 'dlo', 'yaqa', 'leyaqaxd', 'etunkleyaq', '']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('drqianqtangmgbdwruvblkqd'), ['wusajejyucamkyl', 'wsgibljugzrpkniliy', 'lhwqqiuafwffyersqjgjvvvfurx', 'jfokpzzxfdonelorqu', 'ccwkpcgac', 'jmyulqpndkmzbfztobwtm', 'rwrgfkccgxht', 'ggldjecrgbngkonphtcxrkcviujihidjx', 'spwweavbiokizv', 'lv', 'krb', 'vstnhvkbwlqbconaxgbfobqky', 'pvxwdc', 'thrl', 'ahsblffdveamceonqwrbeyxzccmux', 'yozji', 'oejtaxwmeovtqtz', 'zsnzznvqpxdvdxhznxrjn', 'hse', 'kcmkrccxmljzizracxwmpoaggywhdfpxkq']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('yasnpckniistxcejowfijjsvkdajz'), ['slkpxhtsmrtvtm', 'crsbq', 'rdeshtxbfrlfwpsqojassxmvlfbzefldavmgme', 'ipetilcbpsfroefkjirquciwtxhrimbmwnlyv', 'knjpwkmdwbvdbapuyqbtsw', 'horueidziztxovqhsicnklmharuxhtgrsr', 'ofohrgpz', 'oneqnwyevbaqsonrcpmxcynflojmsnix', 'shg', 'nglqzczevgevwawdfperpeytuodjlf']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('ueptpscfgxhplwsueckkxs'), ['ohhygchclbpcdwmftperprn', 'dvpjdqmqckekndvcerqrpkxen', 'lohhvarnmyi', 'zppd', 'qmqxgfewitsunbuhffozcpjtc', 'hsjbioisycsrawktqssjovkmltxodjgv', 'dbzuunwbkrtosyvctdujqtvaawfnvuq', 'gupbvpqthqxae', 'abjdmijaaiasnccgxttmqdsz', 'uccyumqoyqe', 'kxxliepyzlc', 'wbqcqtbyyjbqcgdbpkmzugksmcxhvr', 'piedxm', 'uncpphzoif', 'exkdankwck', 'qeitzozdrqopsergzr', 'hesgrhaftgesnzflrrtjdobxhbepjoas', 'wfpexx']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('ldrzgttlqaphekkkdukgngl'), ['gttlqaphekkkdukgn', 'ekkkd', 'gttlqaphe', 'qaphek', 'h', 'kdu', 'he', 'phek', '', 'drzgttlqaphekkkd']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('ololo'), ['ololo', 'ololo', 'ololo']);
|
||||
SELECT 1 = multiMatch(materialize('khljxzxlpcrxpkrfybbfk'), ['k']);
|
||||
|
||||
select 1 = multiMatch(materialize(''), ['']);
|
||||
select 0 = multiMatch(materialize(''), ['some string']);
|
||||
select 1 = multiMatch(materialize('abc'), ['']);
|
||||
select 1 = multiMatch(materialize('abc'), ['']) from system.numbers limit 10;
|
||||
|
||||
select 0 = multiMatch(materialize('abc'), ['defgh']);
|
||||
select 0 = multiMatch(materialize('abc'), ['defg']);
|
||||
select 0 = multiMatch(materialize('abc'), ['def']);
|
||||
select 0 = multiMatch(materialize('abc'), ['de']);
|
||||
select 0 = multiMatch(materialize('abc'), ['d']);
|
||||
|
||||
|
||||
select 1 = multiMatch(materialize('abc'), ['b']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('abc'), ['bc']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['bcde']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['bcdef']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['bcdefg']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['bcdefgh']) from system.numbers limit 10;
|
||||
|
||||
|
||||
select 0 = multiMatch(materialize('abc'), ['abcdefg']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['abcdef']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['abcde']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['abcd']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('abc'), ['abc']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('abc'), ['ab']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('abc'), ['a']) from system.numbers limit 10;
|
||||
|
||||
select 1 = multiMatch(materialize('abcd'), ['c']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('abcd'), ['cd']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abcd'), ['cde']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abcd'), ['cdef']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abcd'), ['cdefg']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abcd'), ['cdefgh']) from system.numbers limit 10;
|
||||
|
||||
select 0 = multiMatch(materialize('abc'), ['defgh']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['defg']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['def']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['de']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abc'), ['d']) from system.numbers limit 10;
|
||||
|
||||
select 1 = multiMatch(materialize('abc'), ['...']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('a\nbc'), ['a?bc']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('a\nbc'), ['a.bc']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('a\0bc'), ['a?bc']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('a\0bc'), ['a.bc']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('abcdef'), ['a.....']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('abcdef'), ['a......']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('abcdef'), ['a......', 'a.....']) from system.numbers limit 10;
|
||||
select 0 = multiMatch(materialize('aaaa'), ['.*aa.*aaa.*', 'aaaaaa{2}', '\(aa\){3}']) from system.numbers limit 10;
|
||||
select 1 = multiMatch(materialize('abc'), ['a\0d']) from system.numbers limit 10;
|
@ -10,6 +10,7 @@
|
||||
| double-conversion | [BSD 3-Clause License](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) |
|
||||
| FastMemcpy | [MIT](https://github.com/yandex/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/yandex/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/yandex/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/yandex/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
|
@ -44,6 +44,10 @@ Note that the backslash symbol (`\`) is used for escaping in the regular express
|
||||
The regular expression works with the string as if it is a set of bytes. The regular expression can't contain null bytes.
|
||||
For patterns to search for substrings in a string, it is better to use LIKE or 'position', since they work much faster.
|
||||
|
||||
## multiMatch(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
|
||||
The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. It uses [hyperscan](https://github.com/intel/hyperscan) algorithm. For patterns to search substrings in a string, it is better to use `multiSearch` since it works much faster.
|
||||
|
||||
## extract(haystack, pattern)
|
||||
|
||||
Extracts a fragment of a string using a regular expression. If 'haystack' doesn't match the 'pattern' regex, an empty string is returned. If the regex doesn't contain subpatterns, it takes the fragment that matches the entire regex. Otherwise, it takes the fragment that matches the first subpattern.
|
||||
|
@ -1,4 +1,4 @@
|
||||
# Используемые сторонние библиотеки
|
||||
# Используемые сторонние библиотеки
|
||||
|
||||
| Библиотека | Лицензия |
|
||||
| ------- | ------- |
|
||||
@ -10,6 +10,7 @@
|
||||
| double-conversion | [BSD 3-Clause License](https://github.com/google/double-conversion/blob/cf2f0f3d547dc73b4612028a155b80536902ba02/LICENSE) |
|
||||
| FastMemcpy | [MIT](https://github.com/yandex/ClickHouse/blob/master/libs/libmemcpy/impl/LICENSE) |
|
||||
| googletest | [BSD 3-Clause License](https://github.com/google/googletest/blob/master/LICENSE) |
|
||||
| hyperscan | [BSD 3-Clause License](https://github.com/intel/hyperscan/blob/master/LICENSE) |
|
||||
| libbtrie | [BSD 2-Clause License](https://github.com/yandex/ClickHouse/blob/master/contrib/libbtrie/LICENSE) |
|
||||
| libcxxabi | [BSD + MIT](https://github.com/yandex/ClickHouse/blob/master/libs/libglibc-compatibility/libcxxabi/LICENSE.TXT) |
|
||||
| libdivide | [Zlib License](https://github.com/yandex/ClickHouse/blob/master/contrib/libdivide/LICENSE.txt) |
|
||||
|
@ -37,6 +37,10 @@
|
||||
Регулярное выражение работает со строкой как с набором байт. Регулярное выражение не может содержать нулевые байты.
|
||||
Для шаблонов на поиск подстроки в строке, лучше используйте LIKE или position, так как они работают существенно быстрее.
|
||||
|
||||
## multiMatch(haystack, [pattern_1, pattern_2, ..., pattern_n])
|
||||
|
||||
То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется алгоритм [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearch`, так как она работает существенно быстрее.
|
||||
|
||||
## extract(haystack, pattern)
|
||||
Извлечение фрагмента строки по регулярному выражению. Если haystack не соответствует регулярному выражению pattern, то возвращается пустая строка. Если регулярное выражение не содержит subpattern-ов, то вынимается фрагмент, который подпадает под всё регулярное выражение. Иначе вынимается фрагмент, который подпадает под первый subpattern.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user