Refactoring: Make template instantiation easier to read

- introduced class MatchTraits with enums that replace bool template
  parameters

- (minor: made negation the last template parameters because negation
  executes last during evaluation)
This commit is contained in:
Robert Schulze 2022-05-24 14:03:14 +02:00
parent 7348a0eb28
commit b044d44fef
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
7 changed files with 45 additions and 22 deletions

View File

@ -63,14 +63,33 @@ inline bool likePatternIsSubstring(std::string_view pattern, String & res)
} }
/** 'like' - if true, treat pattern as SQL LIKE, otherwise as re2 regexp. // For more readable instantiations of MatchImpl<>
* 'negate' - if true, negate result struct MatchTraits
* 'case_insensitive' - if true, match case insensitively {
* enum class Syntax
* NOTE: We want to run regexp search for whole columns by one call (as implemented in function 'position') {
* but for that, regexp engine must support \0 bytes and their interpretation as string boundaries. Like,
*/ Re2
template <typename Name, bool like, bool negate, bool case_insensitive> };
enum class Case
{
Sensitive,
Insensitive
};
enum class Result
{
DontNegate,
Negate
};
};
/**
* NOTE: We want to run regexp search for whole columns by one call (as implemented in function 'position')
* but for that, regexp engine must support \0 bytes and their interpretation as string boundaries.
*/
template <typename Name, MatchTraits::Syntax syntax_, MatchTraits::Case case_, MatchTraits::Result result_>
struct MatchImpl struct MatchImpl
{ {
static constexpr bool use_default_implementation_for_constants = true; static constexpr bool use_default_implementation_for_constants = true;
@ -81,6 +100,10 @@ struct MatchImpl
using ResultType = UInt8; using ResultType = UInt8;
static constexpr bool is_like = (syntax_ == MatchTraits::Syntax::Like);
static constexpr bool case_insensitive = (case_ == MatchTraits::Case::Insensitive);
static constexpr bool negate = (result_ == MatchTraits::Result::Negate);
using Searcher = std::conditional_t<case_insensitive, using Searcher = std::conditional_t<case_insensitive,
VolnitskyCaseInsensitiveUTF8, VolnitskyCaseInsensitiveUTF8,
VolnitskyUTF8>; VolnitskyUTF8>;
@ -101,7 +124,7 @@ struct MatchImpl
/// A simple case where the [I]LIKE expression reduces to finding a substring in a string /// A simple case where the [I]LIKE expression reduces to finding a substring in a string
String strstr_pattern; String strstr_pattern;
if (like && impl::likePatternIsSubstring(needle, strstr_pattern)) if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern))
{ {
const UInt8 * const begin = haystack_data.data(); const UInt8 * const begin = haystack_data.data();
const UInt8 * const end = haystack_data.data() + haystack_data.size(); const UInt8 * const end = haystack_data.data() + haystack_data.size();
@ -139,7 +162,7 @@ struct MatchImpl
} }
else else
{ {
auto regexp = Regexps::get<like, true, case_insensitive>(needle); auto regexp = Regexps::get<is_like, true, case_insensitive>(needle);
String required_substring; String required_substring;
bool is_trivial; bool is_trivial;
@ -252,7 +275,7 @@ struct MatchImpl
/// A simple case where the LIKE expression reduces to finding a substring in a string /// A simple case where the LIKE expression reduces to finding a substring in a string
String strstr_pattern; String strstr_pattern;
if (like && impl::likePatternIsSubstring(needle, strstr_pattern)) if (is_like && impl::likePatternIsSubstring(needle, strstr_pattern))
{ {
const UInt8 * const begin = haystack.data(); const UInt8 * const begin = haystack.data();
const UInt8 * const end = haystack.data() + haystack.size(); const UInt8 * const end = haystack.data() + haystack.size();
@ -295,7 +318,7 @@ struct MatchImpl
} }
else else
{ {
auto regexp = Regexps::get<like, true, case_insensitive>(needle); auto regexp = Regexps::get<is_like, true, case_insensitive>(needle);
String required_substring; String required_substring;
bool is_trivial; bool is_trivial;
@ -440,7 +463,7 @@ struct MatchImpl
reinterpret_cast<const char *>(cur_needle_data), reinterpret_cast<const char *>(cur_needle_data),
cur_needle_length); cur_needle_length);
if (like && impl::likePatternIsSubstring(needle, required_substr)) if (is_like && impl::likePatternIsSubstring(needle, required_substr))
{ {
if (required_substr.size() > cur_haystack_length) if (required_substr.size() > cur_haystack_length)
res[i] = negate; res[i] = negate;
@ -457,7 +480,7 @@ struct MatchImpl
// each row is expected to contain a different like/re2 pattern // each row is expected to contain a different like/re2 pattern
// --> bypass the regexp cache, instead construct the pattern on-the-fly // --> bypass the regexp cache, instead construct the pattern on-the-fly
const int flags = Regexps::buildRe2Flags<true, case_insensitive>(); const int flags = Regexps::buildRe2Flags<true, case_insensitive>();
const auto & regexp = Regexps::Regexp(Regexps::createRegexp<like>(needle, flags)); const auto & regexp = Regexps::Regexp(Regexps::createRegexp<is_like>(needle, flags));
regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix);
@ -557,7 +580,7 @@ struct MatchImpl
reinterpret_cast<const char *>(cur_needle_data), reinterpret_cast<const char *>(cur_needle_data),
cur_needle_length); cur_needle_length);
if (like && impl::likePatternIsSubstring(needle, required_substr)) if (is_like && impl::likePatternIsSubstring(needle, required_substr))
{ {
if (required_substr.size() > cur_haystack_length) if (required_substr.size() > cur_haystack_length)
res[i] = negate; res[i] = negate;
@ -574,7 +597,7 @@ struct MatchImpl
// each row is expected to contain a different like/re2 pattern // each row is expected to contain a different like/re2 pattern
// --> bypass the regexp cache, instead construct the pattern on-the-fly // --> bypass the regexp cache, instead construct the pattern on-the-fly
const int flags = Regexps::buildRe2Flags<true, case_insensitive>(); const int flags = Regexps::buildRe2Flags<true, case_insensitive>();
const auto & regexp = Regexps::Regexp(Regexps::createRegexp<like>(needle, flags)); const auto & regexp = Regexps::Regexp(Regexps::createRegexp<is_like>(needle, flags));
regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix);

View File

@ -120,7 +120,7 @@ struct MultiMatchAnyImpl
memset(accum.data(), 0, accum.size()); memset(accum.data(), 0, accum.size());
for (size_t j = 0; j < needles.size(); ++j) for (size_t j = 0; j < needles.size(); ++j)
{ {
MatchImpl<Name, false, false, false>::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum); MatchImpl<Name, MatchTraits::Syntax::Re2, MatchTraits::Case::Sensitive, MatchTraits::Result::DontNegate>::vectorConstant(haystack_data, haystack_offsets, needles[j].toString(), nullptr, accum);
for (size_t i = 0; i < res.size(); ++i) for (size_t i = 0; i < res.size(); ++i)
{ {
if constexpr (FindAny) if constexpr (FindAny)

View File

@ -12,7 +12,7 @@ struct NameILike
static constexpr auto name = "ilike"; static constexpr auto name = "ilike";
}; };
using ILikeImpl = MatchImpl<NameILike, true, false, true>; using ILikeImpl = MatchImpl<NameILike, MatchTraits::Syntax::Like, MatchTraits::Case::Insensitive, MatchTraits::Result::DontNegate>;
using FunctionILike = FunctionsStringSearch<ILikeImpl>; using FunctionILike = FunctionsStringSearch<ILikeImpl>;
} }

View File

@ -11,7 +11,7 @@ struct NameLike
static constexpr auto name = "like"; static constexpr auto name = "like";
}; };
using LikeImpl = MatchImpl<NameLike, true, false, false>; using LikeImpl = MatchImpl<NameLike, MatchTraits::Syntax::Like, MatchTraits::Case::Sensitive, MatchTraits::Result::DontNegate>;
using FunctionLike = FunctionsStringSearch<LikeImpl>; using FunctionLike = FunctionsStringSearch<LikeImpl>;
} }

View File

@ -13,7 +13,7 @@ struct NameMatch
static constexpr auto name = "match"; static constexpr auto name = "match";
}; };
using FunctionMatch = FunctionsStringSearch<MatchImpl<NameMatch, false, false, false>>; using FunctionMatch = FunctionsStringSearch<MatchImpl<NameMatch, MatchTraits::Syntax::Re2, MatchTraits::Case::Sensitive, MatchTraits::Result::DontNegate>>;
} }

View File

@ -12,7 +12,7 @@ struct NameNotILike
static constexpr auto name = "notILike"; static constexpr auto name = "notILike";
}; };
using NotILikeImpl = MatchImpl<NameNotILike, true, true, true>; using NotILikeImpl = MatchImpl<NameNotILike, MatchTraits::Syntax::Like, MatchTraits::Case::Insensitive, MatchTraits::Result::Negate>;
using FunctionNotILike = FunctionsStringSearch<NotILikeImpl>; using FunctionNotILike = FunctionsStringSearch<NotILikeImpl>;
} }

View File

@ -12,7 +12,7 @@ struct NameNotLike
static constexpr auto name = "notLike"; static constexpr auto name = "notLike";
}; };
using FunctionNotLike = FunctionsStringSearch<MatchImpl<NameNotLike, true, true, false>>; using FunctionNotLike = FunctionsStringSearch<MatchImpl<NameNotLike, MatchTraits::Syntax::Like, MatchTraits::Case::Sensitive, MatchTraits::Result::Negate>>;
} }