From 0525625ceff3d4d07b1baeb5ced148a41e68e8b7 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Fri, 27 Sep 2024 16:40:34 +0800 Subject: [PATCH 1/8] allow empty needle in replaceRegexp* --- src/Functions/ReplaceRegexpImpl.h | 56 ++++++++++++++----- ..._nonconst_needle_and_replacement.reference | 12 ++++ ...e_with_nonconst_needle_and_replacement.sql | 12 ++-- 3 files changed, 61 insertions(+), 19 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 14f5a2d7932..c03df7221e4 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -205,7 +205,11 @@ struct ReplaceRegexpImpl size_t input_rows_count) { if (needle.empty()) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); + { + res_data.assign(haystack_data.begin(), haystack_data.end()); + res_offsets.assign(haystack_offsets.begin(), haystack_offsets.end()); + return; + } ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); @@ -240,7 +244,7 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t from = i > 0 ? haystack_offsets[i - 1] : 0; + size_t from = haystack_offsets[i - 1]; const char * hs_data = reinterpret_cast(haystack_data.data() + from); const size_t hs_length = static_cast(haystack_offsets[i] - from - 1); @@ -271,17 +275,21 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; + size_t hs_from = haystack_offsets[i - 1]; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0; + size_t ndl_from = needle_offsets[i - 1]; const char * ndl_data = reinterpret_cast(needle_data.data() + ndl_from); const size_t ndl_length = static_cast(needle_offsets[i] - ndl_from - 1); std::string_view needle(ndl_data, ndl_length); if (needle.empty()) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offsets[i - 1]], hs_data, hs_length + 1); + res_offsets[i] = res_offsets[i - 1] + hs_length + 1; + continue; + } re2::RE2 searcher(needle, regexp_options); if (!searcher.ok()) @@ -308,7 +316,11 @@ struct ReplaceRegexpImpl assert(haystack_offsets.size() == replacement_offsets.size()); if (needle.empty()) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); + { + res_data.assign(haystack_data.begin(), haystack_data.end()); + res_offsets.assign(haystack_offsets.begin(), haystack_offsets.end()); + return; + } ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); @@ -325,11 +337,11 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; + size_t hs_from = haystack_offsets[i - 1]; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; + size_t repl_from = replacement_offsets[i - 1]; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); std::string_view replacement(repl_data, repl_length); @@ -364,19 +376,23 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; + size_t hs_from = haystack_offsets[i - 1]; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0; + size_t ndl_from = needle_offsets[i - 1]; const char * ndl_data = reinterpret_cast(needle_data.data() + ndl_from); const size_t ndl_length = static_cast(needle_offsets[i] - ndl_from - 1); std::string_view needle(ndl_data, ndl_length); if (needle.empty()) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); + { + memcpySmallAllowReadWriteOverflow15(&res_data[res_offsets[i - 1]], hs_data, hs_length + 1); + res_offsets[i] = res_offsets[i - 1] + hs_length + 1; + continue; + } - size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; + size_t repl_from = replacement_offsets[i - 1]; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); std::string_view replacement(repl_data, repl_length); @@ -403,7 +419,21 @@ struct ReplaceRegexpImpl size_t input_rows_count) { if (needle.empty()) - throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name); + { + chassert(input_rows_count == haystack_data.size() / n); + /// Since ColumnFixedString does not have a zero byte at the end, while ColumnString does, + /// we need to split haystack_data into strings of length n, add 1 zero byte to the end of each string + /// and then copy to res_data, ref: ColumnString.h and ColumnFixedString.h + res_data.reserve(haystack_data.size() + input_rows_count); + res_offsets.resize(input_rows_count); + for (size_t i = 0; i < input_rows_count; ++i) + { + res_data.insert(res_data.end(), haystack_data.begin() + i * n, haystack_data.begin() + (i + 1) * n); + res_data.push_back(0); + res_offsets[i] = (i + 1) * n + 1; + } + return; + } ColumnString::Offset res_offset = 0; res_data.reserve(haystack_data.size()); diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference index 5e50b9e6cbf..0bf3c3ff333 100644 --- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference @@ -135,3 +135,15 @@ 4 Hello World [eo] x Hxllo World 5 Hello World . x xello World Check that an exception is thrown if the needle is empty +Hexxo Worxd +Hello World +Hexlo World +Hello World +Hello World +Hello World +Hello World +Hello World +Hexxo Worxd +Hello World +Hexlo World +Hello World diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql index b88224a89c1..46ec1e543d1 100644 --- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql @@ -82,19 +82,19 @@ INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', -- needle: non-const, replacement: const SELECT replaceAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT replaceOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab; +SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab; -- needle: const, replacement: non-const SELECT replaceAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT replaceOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab; +SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab; -- needle: non-const, replacement: non-const SELECT replaceAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } SELECT replaceOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } -SELECT replaceRegexpOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND } +SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab; +SELECT replaceRegexpOne(haystack, needle, replacement) FROM test_tab; DROP TABLE IF EXISTS test_tab; From 592c96b227f4151d65b39d7ab63223e5e886563f Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Sun, 29 Sep 2024 18:24:53 +0800 Subject: [PATCH 2/8] simplify code and fix style --- src/Functions/ReplaceRegexpImpl.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index c03df7221e4..34e055ea3b7 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -13,7 +13,6 @@ namespace DB namespace ErrorCodes { - extern const int ARGUMENT_OUT_OF_BOUND; extern const int BAD_ARGUMENTS; } @@ -206,8 +205,8 @@ struct ReplaceRegexpImpl { if (needle.empty()) { - res_data.assign(haystack_data.begin(), haystack_data.end()); - res_offsets.assign(haystack_offsets.begin(), haystack_offsets.end()); + res_data.assign(haystack_data); + res_offsets.assign(haystack_offsets); return; } From 46dffdb58d5f004177538cde493aca2e7be39233 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Tue, 8 Oct 2024 10:25:54 +0800 Subject: [PATCH 3/8] fix tests --- ...ce_with_nonconst_needle_and_replacement.reference | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference index de65b4fa268..e8f87571a7b 100644 --- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference @@ -139,6 +139,14 @@ Hexxo Worxd Hello World Hexlo World Hello World +Hexxo Worxd +Hello World +Hexlo World +Hello World +Hello World +Hello World +Hello World +Hello World Hello World Hello World Hello World @@ -147,3 +155,7 @@ Hexxo Worxd Hello World Hexlo World Hello World +Hexxo Worxd +Hello World +Hexlo World +Hello World From b62bf1899b8aa426c76d38e9201edc27b6ada663 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Wed, 9 Oct 2024 09:22:47 +0800 Subject: [PATCH 4/8] fix fuzzer fail --- src/Functions/ReplaceRegexpImpl.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 34e055ea3b7..2d86aab4cb1 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -243,7 +243,7 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t from = haystack_offsets[i - 1]; + size_t from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + from); const size_t hs_length = static_cast(haystack_offsets[i] - from - 1); @@ -274,11 +274,11 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = haystack_offsets[i - 1]; + size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t ndl_from = needle_offsets[i - 1]; + size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0; const char * ndl_data = reinterpret_cast(needle_data.data() + ndl_from); const size_t ndl_length = static_cast(needle_offsets[i] - ndl_from - 1); std::string_view needle(ndl_data, ndl_length); @@ -336,11 +336,11 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = haystack_offsets[i - 1]; + size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t repl_from = replacement_offsets[i - 1]; + size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); std::string_view replacement(repl_data, repl_length); @@ -375,11 +375,11 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = haystack_offsets[i - 1]; + size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t ndl_from = needle_offsets[i - 1]; + size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0; const char * ndl_data = reinterpret_cast(needle_data.data() + ndl_from); const size_t ndl_length = static_cast(needle_offsets[i] - ndl_from - 1); std::string_view needle(ndl_data, ndl_length); @@ -391,7 +391,7 @@ struct ReplaceRegexpImpl continue; } - size_t repl_from = replacement_offsets[i - 1]; + size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); std::string_view replacement(repl_data, repl_length); From 787bf0eb8e0fce4dc67026ea4012b4acddc4faef Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Thu, 10 Oct 2024 09:25:07 +0800 Subject: [PATCH 5/8] fix index fail in debug. --- src/Functions/ReplaceRegexpImpl.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 2d86aab4cb1..bd8d90dcfe4 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -285,8 +285,8 @@ struct ReplaceRegexpImpl if (needle.empty()) { - memcpySmallAllowReadWriteOverflow15(&res_data[res_offsets[i - 1]], hs_data, hs_length + 1); - res_offsets[i] = res_offsets[i - 1] + hs_length + 1; + memcpySmallAllowReadWriteOverflow15(&res_data[i > 0 ? res_offsets[i - 1] : 0], hs_data, hs_length + 1); + res_offsets[i] = (i > 0 ? res_offsets[i - 1] : 0) + hs_length + 1; continue; } @@ -316,8 +316,8 @@ struct ReplaceRegexpImpl if (needle.empty()) { - res_data.assign(haystack_data.begin(), haystack_data.end()); - res_offsets.assign(haystack_offsets.begin(), haystack_offsets.end()); + res_data.assign(haystack_data); + res_offsets.assign(haystack_offsets); return; } @@ -386,8 +386,8 @@ struct ReplaceRegexpImpl if (needle.empty()) { - memcpySmallAllowReadWriteOverflow15(&res_data[res_offsets[i - 1]], hs_data, hs_length + 1); - res_offsets[i] = res_offsets[i - 1] + hs_length + 1; + memcpySmallAllowReadWriteOverflow15(&res_data[i > 0 ? res_offsets[i - 1] : 0], hs_data, hs_length + 1); + res_offsets[i] = (i > 0 ? res_offsets[i - 1] : 0) + hs_length + 1; continue; } From 65831dcbc92b198c95b5d0f1e9e4390bc19db42b Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Fri, 11 Oct 2024 09:49:11 +0800 Subject: [PATCH 6/8] fix crash and clean code --- src/Functions/ReplaceRegexpImpl.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index bd8d90dcfe4..9f9a6f3bbd0 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -243,7 +243,7 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t from = i > 0 ? haystack_offsets[i - 1] : 0; + size_t from = haystack_offsets[i - 1]; const char * hs_data = reinterpret_cast(haystack_data.data() + from); const size_t hs_length = static_cast(haystack_offsets[i] - from - 1); @@ -274,19 +274,19 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; + size_t hs_from = haystack_offsets[i - 1]; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0; + size_t ndl_from = needle_offsets[i - 1]; const char * ndl_data = reinterpret_cast(needle_data.data() + ndl_from); const size_t ndl_length = static_cast(needle_offsets[i] - ndl_from - 1); std::string_view needle(ndl_data, ndl_length); if (needle.empty()) { - memcpySmallAllowReadWriteOverflow15(&res_data[i > 0 ? res_offsets[i - 1] : 0], hs_data, hs_length + 1); - res_offsets[i] = (i > 0 ? res_offsets[i - 1] : 0) + hs_length + 1; + res_data.insert(res_data.end(), hs_data, hs_data + hs_length); + res_offsets[i] = res_offsets[i - 1] + hs_length + 1; continue; } @@ -336,11 +336,11 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; + size_t hs_from = haystack_offsets[i - 1]; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; + size_t repl_from = replacement_offsets[i - 1]; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); std::string_view replacement(repl_data, repl_length); @@ -375,23 +375,23 @@ struct ReplaceRegexpImpl for (size_t i = 0; i < input_rows_count; ++i) { - size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0; + size_t hs_from = haystack_offsets[i - 1]; const char * hs_data = reinterpret_cast(haystack_data.data() + hs_from); const size_t hs_length = static_cast(haystack_offsets[i] - hs_from - 1); - size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0; + size_t ndl_from = needle_offsets[i - 1]; const char * ndl_data = reinterpret_cast(needle_data.data() + ndl_from); const size_t ndl_length = static_cast(needle_offsets[i] - ndl_from - 1); std::string_view needle(ndl_data, ndl_length); if (needle.empty()) { - memcpySmallAllowReadWriteOverflow15(&res_data[i > 0 ? res_offsets[i - 1] : 0], hs_data, hs_length + 1); - res_offsets[i] = (i > 0 ? res_offsets[i - 1] : 0) + hs_length + 1; + res_data.insert(res_data.end(), hs_data, hs_data + hs_length); + res_offsets[i] = res_offsets[i - 1] + hs_length + 1; continue; } - size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0; + size_t repl_from = replacement_offsets[i - 1]; const char * repl_data = reinterpret_cast(replacement_data.data() + repl_from); const size_t repl_length = static_cast(replacement_offsets[i] - repl_from - 1); std::string_view replacement(repl_data, repl_length); From f6068fa05951ddcaa4f59b57995af9f0726eeefa Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Fri, 11 Oct 2024 16:24:06 +0800 Subject: [PATCH 7/8] fix index bug --- src/Functions/ReplaceRegexpImpl.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 9f9a6f3bbd0..9cb1f103f94 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -286,7 +286,10 @@ struct ReplaceRegexpImpl if (needle.empty()) { res_data.insert(res_data.end(), hs_data, hs_data + hs_length); + res_data.push_back(0); + res_offsets[i] = res_offsets[i - 1] + hs_length + 1; + res_offset = res_offsets[i]; continue; } @@ -387,7 +390,9 @@ struct ReplaceRegexpImpl if (needle.empty()) { res_data.insert(res_data.end(), hs_data, hs_data + hs_length); + res_data.push_back(0); res_offsets[i] = res_offsets[i - 1] + hs_length + 1; + res_offset = res_offsets[i]; continue; } @@ -419,7 +424,7 @@ struct ReplaceRegexpImpl { if (needle.empty()) { - chassert(input_rows_count == haystack_data.size() / n); + chassert(input_rows_count == haystack_data.size() / n); /// Since ColumnFixedString does not have a zero byte at the end, while ColumnString does, /// we need to split haystack_data into strings of length n, add 1 zero byte to the end of each string /// and then copy to res_data, ref: ColumnString.h and ColumnFixedString.h @@ -429,7 +434,7 @@ struct ReplaceRegexpImpl { res_data.insert(res_data.end(), haystack_data.begin() + i * n, haystack_data.begin() + (i + 1) * n); res_data.push_back(0); - res_offsets[i] = (i + 1) * n + 1; + res_offsets[i] = res_offsets[i - 1] + n + 1; } return; } From 57db54239f1e668b9bdb2050ae5ea34df4a94fc0 Mon Sep 17 00:00:00 2001 From: zhanglistar Date: Mon, 21 Oct 2024 12:23:42 +0800 Subject: [PATCH 8/8] modify comment of test and clean code --- src/Functions/ReplaceRegexpImpl.h | 4 ++-- ...eplace_with_nonconst_needle_and_replacement.reference | 5 ++++- ...2536_replace_with_nonconst_needle_and_replacement.sql | 9 ++++----- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 9cb1f103f94..b5572d2eaa4 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -288,8 +288,8 @@ struct ReplaceRegexpImpl res_data.insert(res_data.end(), hs_data, hs_data + hs_length); res_data.push_back(0); - res_offsets[i] = res_offsets[i - 1] + hs_length + 1; - res_offset = res_offsets[i]; + res_offset += hs_length + 1; + res_offsets[i] = res_offset; continue; } diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference index e8f87571a7b..219149f209c 100644 --- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.reference @@ -134,7 +134,8 @@ 3 Hello World not_found x Hello World 4 Hello World [eo] x Hxllo World 5 Hello World . x xello World -Check that whether an exception is thrown if the needle is empty +- should not throw an exception if the needle is empty +- non-const needle, const replacement Hexxo Worxd Hello World Hexlo World @@ -143,6 +144,7 @@ Hexxo Worxd Hello World Hexlo World Hello World +- const needle, non-const replacement Hello World Hello World Hello World @@ -151,6 +153,7 @@ Hello World Hello World Hello World Hello World +- non-const needle, non-const replacement Hexxo Worxd Hello World Hexlo World diff --git a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql index a95eb44bb13..7fd79e3b7ff 100644 --- a/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql +++ b/tests/queries/0_stateless/02536_replace_with_nonconst_needle_and_replacement.sql @@ -69,8 +69,7 @@ SELECT id, haystack, needle, replacement, replaceRegexpOne('Hello World', needle DROP TABLE IF EXISTS test_tab; - -SELECT 'Check that whether an exception is thrown if the needle is empty'; +SELECT '- should not throw an exception if the needle is empty'; CREATE TABLE test_tab (id UInt32, haystack String, needle String, replacement String) @@ -79,19 +78,19 @@ CREATE TABLE test_tab INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', 'y'); --- needle: non-const, replacement: const +SELECT '- non-const needle, const replacement'; SELECT replaceAll(haystack, needle, 'x') FROM test_tab; SELECT replaceOne(haystack, needle, 'x') FROM test_tab; SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab; SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab; --- needle: const, replacement: non-const +SELECT '- const needle, non-const replacement'; SELECT replaceAll(haystack, '', replacement) FROM test_tab; SELECT replaceOne(haystack, '', replacement) FROM test_tab; SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab; SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab; --- needle: non-const, replacement: non-const +SELECT '- non-const needle, non-const replacement'; SELECT replaceAll(haystack, needle, replacement) FROM test_tab; SELECT replaceOne(haystack, needle, replacement) FROM test_tab; SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab;