Pass strings in some places as string_view

The original goal was to get change

  const auto & needle = String(
        reinterpret_cast<const char *>(cur_needle_data),
        cur_needle_length);

in Functions/MatchImpl.h into a std::string_view to save an allocation +
copy. The needle is eventually passed as search pattern into the re2
library. Re2 has an alternative constructor taking a const char * i.e. a
NULL-terminated string. Here, the needle is NULL-terminated but
1. this is only because it is passed inside a ColumnString yet this is
   not always the case (e.g. fixed string columns has a dense layout w/o
   NULL terminator).
2. assuming NULL termination for users != MatchImpl of the regex code is
   too dangerous.

So, for now we'll stay with copying to be on the safe side. One fine day
when re2 has a ptr/size ctor, we can use std::string_view.

Just changing a few other places from std::string to std::string_view
but this will not help with performance.
This commit is contained in:
Robert Schulze 2022-05-24 21:29:43 +02:00
parent e8c96777f6
commit 01ab7b9bad
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
4 changed files with 5 additions and 5 deletions

View File

@ -17,7 +17,7 @@ namespace DB
template <bool thread_safe>
void OptimizedRegularExpressionImpl<thread_safe>::analyze(
const std::string & regexp,
std::string_view regexp,
std::string & required_substring,
bool & is_trivial,
bool & required_substring_is_prefix)

View File

@ -103,7 +103,7 @@ private:
std::unique_ptr<RegexType> re2;
unsigned number_of_subpatterns;
static void analyze(const std::string & regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix);
static void analyze(std::string_view regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix);
};
using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;

View File

@ -481,7 +481,7 @@ struct MatchImpl
{
// each row is expected to contain a different like/re2 pattern
// --> bypass the regexp cache, instead construct the pattern on-the-fly
const int flags = Regexps::buildRe2Flags<true, case_insensitive>();
const int flags = Regexps::buildRe2Flags</*no_capture*/ true, case_insensitive>();
const auto & regexp = Regexps::Regexp(Regexps::createRegexp<is_like>(needle, flags));
regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix);
@ -595,7 +595,7 @@ struct MatchImpl
{
// each row is expected to contain a different like/re2 pattern
// --> bypass the regexp cache, instead construct the pattern on-the-fly
const int flags = Regexps::buildRe2Flags<true, case_insensitive>();
const int flags = Regexps::buildRe2Flags</*no_capture*/ true, case_insensitive>();
const auto & regexp = Regexps::Regexp(Regexps::createRegexp<is_like>(needle, flags));
regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix);

View File

@ -6,7 +6,7 @@ namespace DB
{
/// Transforms the [I]LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$
inline String likePatternToRegexp(const String & pattern)
inline String likePatternToRegexp(std::string_view pattern)
{
String res;
res.reserve(pattern.size() * 2);