Cosmetics

This commit is contained in:
Robert Schulze 2024-07-07 09:07:18 +00:00
parent d8d2007a62
commit 82e1d82cb3
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
2 changed files with 43 additions and 38 deletions

View File

@ -34,7 +34,7 @@ Alias: `replace`.
Replaces the first occurrence of the substring matching the regular expression `pattern` (in [re2 syntax](https://github.com/google/re2/wiki/Syntax)) in `haystack` by the `replacement` string.
`replacement` can containing substitutions `\0-\9`.
`replacement` can contain substitutions `\0-\9`.
Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
To use a verbatim `\` character in the `pattern` or `replacement` strings, escape it using `\`.

View File

@ -48,42 +48,57 @@ struct ReplaceRegexpImpl
static constexpr int max_captures = 10;
static Instructions createInstructions(std::string_view replacement, int num_captures)
/// The replacement string references must not contain non-existing capturing groups.
static void checkSubstitutions(std::string_view replacement, int num_captures)
{
Instructions instructions;
String literals;
for (size_t i = 0; i < replacement.size(); ++i)
{
if (replacement[i] == '\\' && i + 1 < replacement.size())
{
if (isNumericASCII(replacement[i + 1])) /// Substitution
if (isNumericASCII(replacement[i + 1])) /// substitution
{
int substitution_num = replacement[i + 1] - '0';
if (substitution_num >= num_captures)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Substitution '\\{}' in replacement argument is invalid, regexp has only {} capturing groups", substitution_num, num_captures - 1);
}
}
}
}
static Instructions createInstructions(std::string_view replacement, int num_captures)
{
checkSubstitutions(replacement, num_captures);
Instructions instructions;
String literals;
literals.reserve(replacement.size());
for (size_t i = 0; i < replacement.size(); ++i)
{
if (replacement[i] == '\\' && i + 1 < replacement.size())
{
if (isNumericASCII(replacement[i + 1])) /// substitution
{
if (!literals.empty())
{
instructions.emplace_back(literals);
literals = "";
}
instructions.emplace_back(replacement[i + 1] - '0');
int substitution_num = replacement[i + 1] - '0';
instructions.emplace_back(substitution_num);
}
else
literals += replacement[i + 1]; /// Escaping
literals += replacement[i + 1]; /// escaping
++i;
}
else
literals += replacement[i]; /// Plain character
literals += replacement[i]; /// plain character
}
if (!literals.empty())
instructions.emplace_back(literals);
for (const auto & instr : instructions)
if (instr.substitution_num >= num_captures)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Id {} in replacement string is an invalid substitution, regexp has only {} capturing groups",
instr.substitution_num, num_captures - 1);
return instructions;
}
@ -124,7 +139,7 @@ struct ReplaceRegexpImpl
{
std::string_view replacement;
if (instr.substitution_num >= 0)
replacement = std::string_view(matches[instr.substitution_num].data(), matches[instr.substitution_num].size());
replacement = {matches[instr.substitution_num].data(), matches[instr.substitution_num].size()};
else
replacement = instr.literal;
res_data.resize(res_data.size() + replacement.size());
@ -179,19 +194,15 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
/// Cannot perform search for whole columns. Will process each string separately.
for (size_t i = 0; i < haystack_size; ++i)
{
size_t from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -221,10 +232,8 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
/// Cannot perform search for whole columns. Will process each string separately.
for (size_t i = 0; i < haystack_size; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -242,6 +251,7 @@ struct ReplaceRegexpImpl
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
@ -270,17 +280,14 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
/// Cannot perform search for whole columns. Will process each string separately.
for (size_t i = 0; i < haystack_size; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -290,8 +297,9 @@ struct ReplaceRegexpImpl
size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
std::string_view replacement(repl_data, repl_length);
Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
Instructions instructions = createInstructions(replacement, num_captures);
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
res_offsets[i] = res_offset;
@ -317,10 +325,8 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
/// Cannot perform search for whole columns. Will process each string separately.
for (size_t i = 0; i < haystack_size; ++i)
{
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
@ -338,12 +344,14 @@ struct ReplaceRegexpImpl
size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
std::string_view replacement(repl_data, repl_length);
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
Instructions instructions = createInstructions(replacement, num_captures);
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
res_offsets[i] = res_offset;
@ -367,16 +375,13 @@ struct ReplaceRegexpImpl
res_offsets.resize(haystack_size);
re2::RE2::Options regexp_options;
/// Don't write error messages to stderr.
regexp_options.set_log_errors(false);
regexp_options.set_log_errors(false); /// don't write error messages to stderr
re2::RE2 searcher(needle, regexp_options);
if (!searcher.ok())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
Instructions instructions = createInstructions(replacement, num_captures);
for (size_t i = 0; i < haystack_size; ++i)