From 01ab7b9bad008c8c0175327b0ac58aa719077018 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 24 May 2022 21:29:43 +0200 Subject: [PATCH] Pass strings in some places as string_view The original goal was to get change const auto & needle = String( reinterpret_cast(cur_needle_data), cur_needle_length); in Functions/MatchImpl.h into a std::string_view to save an allocation + copy. The needle is eventually passed as search pattern into the re2 library. Re2 has an alternative constructor taking a const char * i.e. a NULL-terminated string. Here, the needle is NULL-terminated but 1. this is only because it is passed inside a ColumnString yet this is not always the case (e.g. fixed string columns has a dense layout w/o NULL terminator). 2. assuming NULL termination for users != MatchImpl of the regex code is too dangerous. So, for now we'll stay with copying to be on the safe side. One fine day when re2 has a ptr/size ctor, we can use std::string_view. Just changing a few other places from std::string to std::string_view but this will not help with performance. --- src/Common/OptimizedRegularExpression.cpp | 2 +- src/Common/OptimizedRegularExpression.h | 2 +- src/Functions/MatchImpl.h | 4 ++-- src/Functions/likePatternToRegexp.h | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index da348adbe31..cfc364929a3 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -17,7 +17,7 @@ namespace DB template void OptimizedRegularExpressionImpl::analyze( - const std::string & regexp, + std::string_view regexp, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix) diff --git a/src/Common/OptimizedRegularExpression.h b/src/Common/OptimizedRegularExpression.h index bbb1b0d5eda..eaa7b06e309 100644 --- a/src/Common/OptimizedRegularExpression.h +++ b/src/Common/OptimizedRegularExpression.h @@ -103,7 +103,7 @@ private: std::unique_ptr re2; unsigned number_of_subpatterns; - static void analyze(const std::string & regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix); + static void analyze(std::string_view regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix); }; using OptimizedRegularExpression = OptimizedRegularExpressionImpl; diff --git a/src/Functions/MatchImpl.h b/src/Functions/MatchImpl.h index 003abe10a89..15cf032aedc 100644 --- a/src/Functions/MatchImpl.h +++ b/src/Functions/MatchImpl.h @@ -481,7 +481,7 @@ struct MatchImpl { // each row is expected to contain a different like/re2 pattern // --> bypass the regexp cache, instead construct the pattern on-the-fly - const int flags = Regexps::buildRe2Flags(); + const int flags = Regexps::buildRe2Flags(); const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); @@ -595,7 +595,7 @@ struct MatchImpl { // each row is expected to contain a different like/re2 pattern // --> bypass the regexp cache, instead construct the pattern on-the-fly - const int flags = Regexps::buildRe2Flags(); + const int flags = Regexps::buildRe2Flags(); const auto & regexp = Regexps::Regexp(Regexps::createRegexp(needle, flags)); regexp.getAnalyzeResult(required_substr, is_trivial, required_substring_is_prefix); diff --git a/src/Functions/likePatternToRegexp.h b/src/Functions/likePatternToRegexp.h index 15e38e61ab4..319a3729e16 100644 --- a/src/Functions/likePatternToRegexp.h +++ b/src/Functions/likePatternToRegexp.h @@ -6,7 +6,7 @@ namespace DB { /// Transforms the [I]LIKE expression into regexp re2. For example, abc%def -> ^abc.*def$ -inline String likePatternToRegexp(const String & pattern) +inline String likePatternToRegexp(std::string_view pattern) { String res; res.reserve(pattern.size() * 2);