diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 54aaa3116fd..e1c6b95d357 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -63,14 +63,33 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res) } -/** 'like' - if true, treat pattern as SQL LIKE, otherwise as re2 regexp. - * 'negate' - if true, negate result - * 'case_insensitive' - if true, match case insensitively - * - * NOTE: We want to run regexp search for whole columns by one call (as implemented in function 'position') - * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. - */ -template +// For more readable instantiations of MatchImpl<> +struct MatchTraits +{ +enum class Syntax +{ + Like, + Re2 +}; + +enum class Case +{ + Sensitive, + Insensitive +}; + +enum class Result +{ + DontNegate, + Negate +}; +}; + +/** + * NOTE: We want to run regexp search for whole columns by one call (as implemented in function 'position') + * but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. + */ +template struct MatchImpl { static constexpr bool use_default_implementation_for_constants = true; @@ -81,6 +100,10 @@ struct MatchImpl using ResultType = UInt8; + static constexpr bool is_like = (syntax_ == MatchTraits::Syntax::Like); + static constexpr bool case_insensitive = (case_ == MatchTraits::Case::Insensitive); + static constexpr bool negate = (result_ == MatchTraits::Result::Negate); + using Searcher = std::conditional_t; @@ -101,7 +124,7 @@ struct MatchImpl /// A simple case where the [I]LIKE expression reduces to finding a substring in a string String strstr_pattern; - if (like && impl::likePatternIsSubstring(needle, strstr_pattern)) + if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern)) { const UInt8 * const begin = haystack_data.data(); const UInt8 * const end = haystack_data.data() + haystack_data.size(); @@ -139,7 +162,7 @@ struct MatchImpl } else { - auto regexp = Regexps::get(needle); + auto regexp = Regexps::get(needle); String required_substring; bool is_trivial; @@ -252,7 +275,7 @@ struct MatchImpl /// A simple case where the LIKE expression reduces to finding a substring in a string String strstr_pattern; - if (like && impl::likePatternIsSubstring(needle, strstr_pattern)) + if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern)) { const UInt8 * const begin = haystack.data(); const UInt8 * const end = haystack.data() + haystack.size(); @@ -295,7 +318,7 @@ struct MatchImpl } else { - auto regexp = Regexps::get(needle); + auto regexp = Regexps::get(needle); String required_substring; bool is_trivial; @@ -440,7 +463,7 @@ struct MatchImpl reinterpret_cast(cur_needle_data), cur_needle_length); - if (like && impl::likePatternIsSubstring(needle, required_substr)) + if (is_like && impl::likePatternIsSubstring(needle, required_substr)) { if (required_substr.size() > cur_haystack_length) res[i] = negate; @@ -457,7 +480,7 @@ struct MatchImpl // each row is expected to contain a different like/re2 pattern // --> bypass the regexp cache, instead construct the pattern on-the-fly const int flags = Regexps::buildRe2Flags(); - const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); + const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); @@ -557,7 +580,7 @@ struct MatchImpl reinterpret_cast(cur_needle_data), cur_needle_length); - if (like && impl::likePatternIsSubstring(needle, required_substr)) + if (is_like && impl::likePatternIsSubstring(needle, required_substr)) { if (required_substr.size() > cur_haystack_length) res[i] = negate; @@ -574,7 +597,7 @@ struct MatchImpl // each row is expected to contain a different like/re2 pattern // --> bypass the regexp cache, instead construct the pattern on-the-fly const int flags = Regexps::buildRe2Flags(); - const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); + const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); diff --git a/src/Functions/MultiMatchAnyImpl.h b/src/Functions/MultiMatchAnyImpl.h index 595a3c8de5b..8a65c8cb2b4 100644 --- a/src/Functions/MultiMatchAnyImpl.h +++ b/src/Functions/MultiMatchAnyImpl.h @@ -120,7 +120,7 @@ struct MultiMatchAnyImpl memset(accum.data(), 0, accum.size()); for (size_t j = 0; j < needles.size(); ++j) { - MatchImpl::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); + MatchImpl::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); for (size_t i = 0; i < res.size(); ++i) { if constexpr (FindAny) diff --git a/src/Functions/ilike.cpp b/src/Functions/ilike.cpp index 1222cc48d07..b88d01986d5 100644 --- a/src/Functions/ilike.cpp +++ b/src/Functions/ilike.cpp @@ -12,7 +12,7 @@ struct NameILike static constexpr auto name = "ilike"; }; -using ILikeImpl = MatchImpl; +using ILikeImpl = MatchImpl; using FunctionILike = FunctionsStringSearch; } diff --git a/src/Functions/like.h b/src/Functions/like.h index edb738d393b..9e25fc6f4c0 100644 --- a/src/Functions/like.h +++ b/src/Functions/like.h @@ -11,7 +11,7 @@ struct NameLike static constexpr auto name = "like"; }; -using LikeImpl = MatchImpl; +using LikeImpl = MatchImpl; using FunctionLike = FunctionsStringSearch; } diff --git a/src/Functions/match.cpp b/src/Functions/match.cpp index 4c329701464..a0789f229fd 100644 --- a/src/Functions/match.cpp +++ b/src/Functions/match.cpp @@ -13,7 +13,7 @@ struct NameMatch static constexpr auto name = "match"; }; -using FunctionMatch = FunctionsStringSearch>; +using FunctionMatch = FunctionsStringSearch>; } diff --git a/src/Functions/notILike.cpp b/src/Functions/notILike.cpp index b5e06ac55f4..5e78db1c518 100644 --- a/src/Functions/notILike.cpp +++ b/src/Functions/notILike.cpp @@ -12,7 +12,7 @@ struct NameNotILike static constexpr auto name = "notILike"; }; -using NotILikeImpl = MatchImpl; +using NotILikeImpl = MatchImpl; using FunctionNotILike = FunctionsStringSearch; } diff --git a/src/Functions/notLike.cpp b/src/Functions/notLike.cpp index 7fa1b6f9122..33a36748bb1 100644 --- a/src/Functions/notLike.cpp +++ b/src/Functions/notLike.cpp @@ -12,7 +12,7 @@ struct NameNotLike static constexpr auto name = "notLike"; }; -using FunctionNotLike = FunctionsStringSearch>; +using FunctionNotLike = FunctionsStringSearch>; }