mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-15 12:14:18 +00:00
Merge pull request #70053 from bigo-sg/regReplace-empty-needle
Allow empty needle in replaceRegexp*
This commit is contained in:
commit
f41d604f28
@ -13,7 +13,6 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
@ -205,7 +204,11 @@ struct ReplaceRegexpImpl
|
||||
size_t input_rows_count)
|
||||
{
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
{
|
||||
res_data.assign(haystack_data);
|
||||
res_offsets.assign(haystack_offsets);
|
||||
return;
|
||||
}
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
res_data.reserve(haystack_data.size());
|
||||
@ -240,7 +243,7 @@ struct ReplaceRegexpImpl
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
size_t from = i > 0 ? haystack_offsets[i - 1] : 0;
|
||||
size_t from = haystack_offsets[i - 1];
|
||||
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);
|
||||
const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - from - 1);
|
||||
@ -271,17 +274,24 @@ struct ReplaceRegexpImpl
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
|
||||
size_t hs_from = haystack_offsets[i - 1];
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
|
||||
const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
|
||||
|
||||
size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0;
|
||||
size_t ndl_from = needle_offsets[i - 1];
|
||||
const char * ndl_data = reinterpret_cast<const char *>(needle_data.data() + ndl_from);
|
||||
const size_t ndl_length = static_cast<unsigned>(needle_offsets[i] - ndl_from - 1);
|
||||
std::string_view needle(ndl_data, ndl_length);
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
{
|
||||
res_data.insert(res_data.end(), hs_data, hs_data + hs_length);
|
||||
res_data.push_back(0);
|
||||
|
||||
res_offset += hs_length + 1;
|
||||
res_offsets[i] = res_offset;
|
||||
continue;
|
||||
}
|
||||
|
||||
re2::RE2 searcher(needle, regexp_options);
|
||||
if (!searcher.ok())
|
||||
@ -308,7 +318,11 @@ struct ReplaceRegexpImpl
|
||||
assert(haystack_offsets.size() == replacement_offsets.size());
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
{
|
||||
res_data.assign(haystack_data);
|
||||
res_offsets.assign(haystack_offsets);
|
||||
return;
|
||||
}
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
res_data.reserve(haystack_data.size());
|
||||
@ -325,11 +339,11 @@ struct ReplaceRegexpImpl
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
|
||||
size_t hs_from = haystack_offsets[i - 1];
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
|
||||
const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
|
||||
|
||||
size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
|
||||
size_t repl_from = replacement_offsets[i - 1];
|
||||
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
|
||||
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
|
||||
std::string_view replacement(repl_data, repl_length);
|
||||
@ -364,19 +378,25 @@ struct ReplaceRegexpImpl
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
|
||||
size_t hs_from = haystack_offsets[i - 1];
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
|
||||
const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
|
||||
|
||||
size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0;
|
||||
size_t ndl_from = needle_offsets[i - 1];
|
||||
const char * ndl_data = reinterpret_cast<const char *>(needle_data.data() + ndl_from);
|
||||
const size_t ndl_length = static_cast<unsigned>(needle_offsets[i] - ndl_from - 1);
|
||||
std::string_view needle(ndl_data, ndl_length);
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
{
|
||||
res_data.insert(res_data.end(), hs_data, hs_data + hs_length);
|
||||
res_data.push_back(0);
|
||||
res_offsets[i] = res_offsets[i - 1] + hs_length + 1;
|
||||
res_offset = res_offsets[i];
|
||||
continue;
|
||||
}
|
||||
|
||||
size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
|
||||
size_t repl_from = replacement_offsets[i - 1];
|
||||
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
|
||||
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
|
||||
std::string_view replacement(repl_data, repl_length);
|
||||
@ -403,7 +423,21 @@ struct ReplaceRegexpImpl
|
||||
size_t input_rows_count)
|
||||
{
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
{
|
||||
chassert(input_rows_count == haystack_data.size() / n);
|
||||
/// Since ColumnFixedString does not have a zero byte at the end, while ColumnString does,
|
||||
/// we need to split haystack_data into strings of length n, add 1 zero byte to the end of each string
|
||||
/// and then copy to res_data, ref: ColumnString.h and ColumnFixedString.h
|
||||
res_data.reserve(haystack_data.size() + input_rows_count);
|
||||
res_offsets.resize(input_rows_count);
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
{
|
||||
res_data.insert(res_data.end(), haystack_data.begin() + i * n, haystack_data.begin() + (i + 1) * n);
|
||||
res_data.push_back(0);
|
||||
res_offsets[i] = res_offsets[i - 1] + n + 1;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
res_data.reserve(haystack_data.size());
|
||||
|
@ -134,15 +134,30 @@
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hxllo World
|
||||
5 Hello World . x xello World
|
||||
Check that whether an exception is thrown if the needle is empty
|
||||
- should not throw an exception if the needle is empty
|
||||
- non-const needle, const replacement
|
||||
Hexxo Worxd
|
||||
Hello World
|
||||
Hexlo World
|
||||
Hello World
|
||||
Hexxo Worxd
|
||||
Hello World
|
||||
Hexlo World
|
||||
Hello World
|
||||
- const needle, non-const replacement
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
- non-const needle, non-const replacement
|
||||
Hexxo Worxd
|
||||
Hello World
|
||||
Hexlo World
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
Hello World
|
||||
Hexxo Worxd
|
||||
Hello World
|
||||
Hexlo World
|
||||
|
@ -69,8 +69,7 @@ SELECT id, haystack, needle, replacement, replaceRegexpOne('Hello World', needle
|
||||
|
||||
DROP TABLE IF EXISTS test_tab;
|
||||
|
||||
|
||||
SELECT 'Check that whether an exception is thrown if the needle is empty';
|
||||
SELECT '- should not throw an exception if the needle is empty';
|
||||
|
||||
CREATE TABLE test_tab
|
||||
(id UInt32, haystack String, needle String, replacement String)
|
||||
@ -79,22 +78,22 @@ CREATE TABLE test_tab
|
||||
|
||||
INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', 'y');
|
||||
|
||||
-- needle: non-const, replacement: const
|
||||
SELECT '- non-const needle, const replacement';
|
||||
SELECT replaceAll(haystack, needle, 'x') FROM test_tab;
|
||||
SELECT replaceOne(haystack, needle, 'x') FROM test_tab;
|
||||
SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab;
|
||||
SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab;
|
||||
|
||||
-- needle: const, replacement: non-const
|
||||
SELECT '- const needle, non-const replacement';
|
||||
SELECT replaceAll(haystack, '', replacement) FROM test_tab;
|
||||
SELECT replaceOne(haystack, '', replacement) FROM test_tab;
|
||||
SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab;
|
||||
SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab;
|
||||
|
||||
-- needle: non-const, replacement: non-const
|
||||
SELECT '- non-const needle, non-const replacement';
|
||||
SELECT replaceAll(haystack, needle, replacement) FROM test_tab;
|
||||
SELECT replaceOne(haystack, needle, replacement) FROM test_tab;
|
||||
SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab;
|
||||
SELECT replaceRegexpOne(haystack, needle, replacement) FROM test_tab;
|
||||
|
||||
DROP TABLE IF EXISTS test_tab;
|
||||
|
Loading…
Reference in New Issue
Block a user