mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Merge pull request #46589 from ClickHouse/rs/nonconst-replace
replace[Regexp]{One,All}(): allow non-const pattern and replacement arguments
This commit is contained in:
commit
166a63c36c
@ -13,17 +13,18 @@ Functions for [searching](../../sql-reference/functions/string-search-functions.
|
||||
## replaceOne(haystack, pattern, replacement)
|
||||
|
||||
Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string.
|
||||
‘pattern’ and ‘replacement’ must be constants.
|
||||
|
||||
## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)
|
||||
|
||||
Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string.
|
||||
|
||||
Alias: `replace`.
|
||||
|
||||
## replaceRegexpOne(haystack, pattern, replacement)
|
||||
|
||||
Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string.
|
||||
‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
|
||||
‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`.
|
||||
‘pattern‘ must be a [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
|
||||
‘replacement’ must be a plain string or a string containing substitutions `\0-\9`.
|
||||
Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
|
||||
To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`.
|
||||
Also keep in mind that string literals require an extra escaping.
|
||||
@ -88,6 +89,8 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
Alias: `REGEXP_REPLACE`.
|
||||
|
||||
## regexpQuoteMeta(s)
|
||||
|
||||
The function adds a backslash before some predefined characters in the string.
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -13,16 +14,14 @@ namespace DB
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
class FunctionStringReplace : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = Name::name;
|
||||
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionStringReplace>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
@ -32,65 +31,80 @@ public:
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
if (!isStringOrFixedString(arguments[0]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of first argument of function {}",
|
||||
arguments[0]->getName(), getName());
|
||||
FunctionArgumentDescriptors args{
|
||||
{"haystack", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"},
|
||||
{"pattern", &isString<IDataType>, nullptr, "String"},
|
||||
{"replacement", &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
if (!isStringOrFixedString(arguments[1]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of second argument of function {}",
|
||||
arguments[1]->getName(), getName());
|
||||
|
||||
if (!isStringOrFixedString(arguments[2]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Illegal type {} of third argument of function {}",
|
||||
arguments[2]->getName(), getName());
|
||||
validateFunctionArgumentTypes(*this, arguments, args);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
const ColumnPtr column_src = arguments[0].column;
|
||||
const ColumnPtr column_haystack = arguments[0].column;
|
||||
const ColumnPtr column_needle = arguments[1].column;
|
||||
const ColumnPtr column_replacement = arguments[2].column;
|
||||
|
||||
if (!isColumnConst(*column_needle) || !isColumnConst(*column_replacement))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"2nd and 3rd arguments of function {} must be constants.",
|
||||
getName());
|
||||
const ColumnString * col_haystack = checkAndGetColumn<ColumnString>(column_haystack.get());
|
||||
const ColumnFixedString * col_haystack_fixed = checkAndGetColumn<ColumnFixedString>(column_haystack.get());
|
||||
|
||||
const IColumn * c1 = arguments[1].column.get();
|
||||
const IColumn * c2 = arguments[2].column.get();
|
||||
const ColumnConst * c1_const = typeid_cast<const ColumnConst *>(c1);
|
||||
const ColumnConst * c2_const = typeid_cast<const ColumnConst *>(c2);
|
||||
String needle = c1_const->getValue<String>();
|
||||
String replacement = c2_const->getValue<String>();
|
||||
const ColumnString * col_needle_vector = checkAndGetColumn<ColumnString>(column_needle.get());
|
||||
const ColumnConst * col_needle_const = checkAndGetColumn<ColumnConst>(column_needle.get());
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception(
|
||||
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
|
||||
"Length of the second argument of function replace must be greater than 0.");
|
||||
const ColumnString * col_replacement_vector = checkAndGetColumn<ColumnString>(column_replacement.get());
|
||||
const ColumnConst * col_replacement_const = checkAndGetColumn<ColumnConst>(column_replacement.get());
|
||||
|
||||
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_src.get()))
|
||||
auto col_res = ColumnString::create();
|
||||
|
||||
if (col_haystack && col_needle_const && col_replacement_const)
|
||||
{
|
||||
auto col_res = ColumnString::create();
|
||||
Impl::vector(col->getChars(), col->getOffsets(), needle, replacement, col_res->getChars(), col_res->getOffsets());
|
||||
Impl::vectorConstantConstant(
|
||||
col_haystack->getChars(), col_haystack->getOffsets(),
|
||||
col_needle_const->getValue<String>(),
|
||||
col_replacement_const->getValue<String>(),
|
||||
col_res->getChars(), col_res->getOffsets());
|
||||
return col_res;
|
||||
}
|
||||
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_src.get()))
|
||||
else if (col_haystack && col_needle_vector && col_replacement_const)
|
||||
{
|
||||
auto col_res = ColumnString::create();
|
||||
Impl::vectorFixed(col_fixed->getChars(), col_fixed->getN(), needle, replacement, col_res->getChars(), col_res->getOffsets());
|
||||
Impl::vectorVectorConstant(
|
||||
col_haystack->getChars(), col_haystack->getOffsets(),
|
||||
col_needle_vector->getChars(), col_needle_vector->getOffsets(),
|
||||
col_replacement_const->getValue<String>(),
|
||||
col_res->getChars(), col_res->getOffsets());
|
||||
return col_res;
|
||||
}
|
||||
else if (col_haystack && col_needle_const && col_replacement_vector)
|
||||
{
|
||||
Impl::vectorConstantVector(
|
||||
col_haystack->getChars(), col_haystack->getOffsets(),
|
||||
col_needle_const->getValue<String>(),
|
||||
col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
|
||||
col_res->getChars(), col_res->getOffsets());
|
||||
return col_res;
|
||||
}
|
||||
else if (col_haystack && col_needle_vector && col_replacement_vector)
|
||||
{
|
||||
Impl::vectorVectorVector(
|
||||
col_haystack->getChars(), col_haystack->getOffsets(),
|
||||
col_needle_vector->getChars(), col_needle_vector->getOffsets(),
|
||||
col_replacement_vector->getChars(), col_replacement_vector->getOffsets(),
|
||||
col_res->getChars(), col_res->getOffsets());
|
||||
return col_res;
|
||||
}
|
||||
else if (col_haystack_fixed && col_needle_const && col_replacement_const)
|
||||
{
|
||||
Impl::vectorFixedConstantConstant(
|
||||
col_haystack_fixed->getChars(), col_haystack_fixed->getN(),
|
||||
col_needle_const->getValue<String>(),
|
||||
col_replacement_const->getValue<String>(),
|
||||
col_res->getChars(), col_res->getOffsets());
|
||||
return col_res;
|
||||
}
|
||||
else
|
||||
|
@ -13,6 +13,7 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
@ -28,9 +29,11 @@ struct ReplaceRegexpTraits
|
||||
/** Replace all matches of regexp 'needle' to string 'replacement'. 'needle' and 'replacement' are constants.
|
||||
* 'replacement' can contain substitutions, for example: '\2-\3-\1'
|
||||
*/
|
||||
template <ReplaceRegexpTraits::Replace replace>
|
||||
template <typename Name, ReplaceRegexpTraits::Replace replace>
|
||||
struct ReplaceRegexpImpl
|
||||
{
|
||||
static constexpr auto name = Name::name;
|
||||
|
||||
struct Instruction
|
||||
{
|
||||
/// If not negative, perform substitution of n-th subpattern from the regexp match.
|
||||
@ -162,18 +165,21 @@ struct ReplaceRegexpImpl
|
||||
++res_offset;
|
||||
}
|
||||
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
static void vectorConstantConstant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const String & needle,
|
||||
const String & replacement,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
res_data.reserve(data.size());
|
||||
size_t size = offsets.size();
|
||||
res_offsets.resize(size);
|
||||
res_data.reserve(haystack_data.size());
|
||||
size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
@ -182,39 +188,89 @@ struct ReplaceRegexpImpl
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
|
||||
if (!searcher.ok())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The pattern argument is not a valid re2 pattern: {}",
|
||||
searcher.error());
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
|
||||
Instructions instructions = createInstructions(replacement, num_captures);
|
||||
|
||||
/// Cannot perform search for whole columns. Will process each string separately.
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
size_t from = i > 0 ? offsets[i - 1] : 0;
|
||||
const char * haystack_data = reinterpret_cast<const char *>(data.data() + from);
|
||||
const size_t haystack_length = static_cast<unsigned>(offsets[i] - from - 1);
|
||||
size_t from = i > 0 ? haystack_offsets[i - 1] : 0;
|
||||
|
||||
processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);
|
||||
const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - from - 1);
|
||||
|
||||
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
res_offsets[i] = res_offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorFixed(
|
||||
const ColumnString::Chars & data,
|
||||
size_t n,
|
||||
const String & needle,
|
||||
static void vectorVectorConstant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const ColumnString::Chars & needle_data,
|
||||
const ColumnString::Offsets & needle_offsets,
|
||||
const String & replacement,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
assert(haystack_offsets.size() == needle_offsets.size());
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
size_t size = data.size() / n;
|
||||
res_data.reserve(data.size());
|
||||
res_offsets.resize(size);
|
||||
res_data.reserve(haystack_data.size());
|
||||
size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
/// Cannot perform search for whole columns. Will process each string separately.
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
|
||||
const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
|
||||
|
||||
size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0;
|
||||
const char * ndl_data = reinterpret_cast<const char *>(needle_data.data() + ndl_from);
|
||||
const size_t ndl_length = static_cast<unsigned>(needle_offsets[i] - ndl_from - 1);
|
||||
std::string_view needle(ndl_data, ndl_length);
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
if (!searcher.ok())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
Instructions instructions = createInstructions(replacement, num_captures);
|
||||
|
||||
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
res_offsets[i] = res_offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorConstantVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const String & needle,
|
||||
const ColumnString::Chars & replacement_data,
|
||||
const ColumnString::Offsets & replacement_offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
assert(haystack_offsets.size() == replacement_offsets.size());
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
res_data.reserve(haystack_data.size());
|
||||
size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
@ -223,22 +279,116 @@ struct ReplaceRegexpImpl
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
|
||||
if (!searcher.ok())
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"The pattern argument is not a valid re2 pattern: {}",
|
||||
searcher.error());
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
|
||||
/// Cannot perform search for whole columns. Will process each string separately.
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
|
||||
const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
|
||||
|
||||
size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
|
||||
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
|
||||
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
|
||||
|
||||
Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
|
||||
|
||||
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
res_offsets[i] = res_offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorVectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const ColumnString::Chars & needle_data,
|
||||
const ColumnString::Offsets & needle_offsets,
|
||||
const ColumnString::Chars & replacement_data,
|
||||
const ColumnString::Offsets & replacement_offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
assert(haystack_offsets.size() == needle_offsets.size());
|
||||
assert(needle_offsets.size() == replacement_offsets.size());
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
res_data.reserve(haystack_data.size());
|
||||
size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
/// Cannot perform search for whole columns. Will process each string separately.
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
size_t hs_from = i > 0 ? haystack_offsets[i - 1] : 0;
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + hs_from);
|
||||
const size_t hs_length = static_cast<unsigned>(haystack_offsets[i] - hs_from - 1);
|
||||
|
||||
size_t ndl_from = i > 0 ? needle_offsets[i - 1] : 0;
|
||||
const char * ndl_data = reinterpret_cast<const char *>(needle_data.data() + ndl_from);
|
||||
const size_t ndl_length = static_cast<unsigned>(needle_offsets[i] - ndl_from - 1);
|
||||
std::string_view needle(ndl_data, ndl_length);
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
size_t repl_from = i > 0 ? replacement_offsets[i - 1] : 0;
|
||||
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
|
||||
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
if (!searcher.ok())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
Instructions instructions = createInstructions(std::string_view(repl_data, repl_length), num_captures);
|
||||
|
||||
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
res_offsets[i] = res_offset;
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorFixedConstantConstant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
size_t n,
|
||||
const String & needle,
|
||||
const String & replacement,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
size_t haystack_size = haystack_data.size() / n;
|
||||
res_data.reserve(haystack_data.size());
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
|
||||
if (!searcher.ok())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
|
||||
Instructions instructions = createInstructions(replacement, num_captures);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
size_t from = i * n;
|
||||
const char * haystack_data = reinterpret_cast<const char *>(data.data() + from);
|
||||
const size_t haystack_length = n;
|
||||
const char * hs_data = reinterpret_cast<const char *>(haystack_data.data() + from);
|
||||
const size_t hs_length = n;
|
||||
|
||||
processString(haystack_data, haystack_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
processString(hs_data, hs_length, res_data, res_offset, searcher, num_captures, instructions);
|
||||
res_offsets[i] = res_offset;
|
||||
}
|
||||
}
|
||||
|
@ -8,6 +8,11 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
}
|
||||
|
||||
struct ReplaceStringTraits
|
||||
{
|
||||
enum class Replace
|
||||
@ -16,27 +21,33 @@ struct ReplaceStringTraits
|
||||
All
|
||||
};
|
||||
};
|
||||
/** Replace one or all occurencies of substring 'needle' to 'replacement'. 'needle' and 'replacement' are constants.
|
||||
|
||||
/** Replace one or all occurencies of substring 'needle' to 'replacement'.
|
||||
*/
|
||||
template <ReplaceStringTraits::Replace replace>
|
||||
template <typename Name, ReplaceStringTraits::Replace replace>
|
||||
struct ReplaceStringImpl
|
||||
{
|
||||
static void vector(
|
||||
const ColumnString::Chars & data,
|
||||
const ColumnString::Offsets & offsets,
|
||||
const std::string & needle,
|
||||
const std::string & replacement,
|
||||
static constexpr auto name = Name::name;
|
||||
|
||||
static void vectorConstantConstant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const String & needle,
|
||||
const String & replacement,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
const UInt8 * begin = data.data();
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
const UInt8 * const begin = haystack_data.data();
|
||||
const UInt8 * const end = haystack_data.data() + haystack_data.size();
|
||||
const UInt8 * pos = begin;
|
||||
const UInt8 * end = pos + data.size();
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
res_data.reserve(data.size());
|
||||
size_t size = offsets.size();
|
||||
res_offsets.resize(size);
|
||||
res_data.reserve(haystack_data.size());
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
/// The current index in the array of strings.
|
||||
size_t i = 0;
|
||||
@ -53,22 +64,22 @@ struct ReplaceStringImpl
|
||||
memcpy(&res_data[res_offset], pos, match - pos);
|
||||
|
||||
/// Determine which index it belongs to.
|
||||
while (i < offsets.size() && begin + offsets[i] <= match)
|
||||
while (i < haystack_offsets.size() && begin + haystack_offsets[i] <= match)
|
||||
{
|
||||
res_offsets[i] = res_offset + ((begin + offsets[i]) - pos);
|
||||
res_offsets[i] = res_offset + ((begin + haystack_offsets[i]) - pos);
|
||||
++i;
|
||||
}
|
||||
res_offset += (match - pos);
|
||||
|
||||
/// If you have reached the end, it's time to stop
|
||||
if (i == offsets.size())
|
||||
if (i == haystack_offsets.size())
|
||||
break;
|
||||
|
||||
/// Is it true that this string no longer needs to perform transformations.
|
||||
bool can_finish_current_string = false;
|
||||
|
||||
/// We check that the entry does not go through the boundaries of strings.
|
||||
if (match + needle.size() < begin + offsets[i])
|
||||
if (match + needle.size() < begin + haystack_offsets[i])
|
||||
{
|
||||
res_data.resize(res_data.size() + replacement.size());
|
||||
memcpy(&res_data[res_offset], replacement.data(), replacement.size());
|
||||
@ -85,34 +96,268 @@ struct ReplaceStringImpl
|
||||
|
||||
if (can_finish_current_string)
|
||||
{
|
||||
res_data.resize(res_data.size() + (begin + offsets[i] - pos));
|
||||
memcpy(&res_data[res_offset], pos, (begin + offsets[i] - pos));
|
||||
res_offset += (begin + offsets[i] - pos);
|
||||
res_data.resize(res_data.size() + (begin + haystack_offsets[i] - pos));
|
||||
memcpy(&res_data[res_offset], pos, (begin + haystack_offsets[i] - pos));
|
||||
res_offset += (begin + haystack_offsets[i] - pos);
|
||||
res_offsets[i] = res_offset;
|
||||
pos = begin + offsets[i];
|
||||
pos = begin + haystack_offsets[i];
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Note: this function converts fixed-length strings to variable-length strings
|
||||
/// and each variable-length string should ends with zero byte.
|
||||
static void vectorFixed(
|
||||
const ColumnString::Chars & data,
|
||||
size_t n,
|
||||
const std::string & needle,
|
||||
const std::string & replacement,
|
||||
template <typename CharT>
|
||||
requires (sizeof(CharT) == 1)
|
||||
static void copyToOutput(
|
||||
const CharT * what_start, size_t what_size,
|
||||
ColumnString::Chars & output, ColumnString::Offset & output_offset)
|
||||
{
|
||||
output.resize(output.size() + what_size);
|
||||
memcpy(&output[output_offset], what_start, what_size);
|
||||
output_offset += what_size;
|
||||
}
|
||||
|
||||
static void vectorVectorConstant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const ColumnString::Chars & needle_data,
|
||||
const ColumnString::Offsets & needle_offsets,
|
||||
const String & replacement,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
const UInt8 * begin = data.data();
|
||||
const UInt8 * pos = begin;
|
||||
const UInt8 * end = pos + data.size();
|
||||
chassert(haystack_offsets.size() == needle_offsets.size());
|
||||
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
|
||||
res_data.reserve(haystack_data.size());
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
size_t count = data.size() / n;
|
||||
res_data.reserve(data.size());
|
||||
res_offsets.resize(count);
|
||||
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needle_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
|
||||
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
const auto * const cur_needle_data = &needle_data[prev_needle_offset];
|
||||
const size_t cur_needle_length = needle_offsets[i] - prev_needle_offset - 1;
|
||||
|
||||
if (cur_needle_length == 0)
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
/// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row
|
||||
StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length);
|
||||
|
||||
const auto * last_match = static_cast<UInt8 *>(nullptr);
|
||||
const auto * start_pos = cur_haystack_data;
|
||||
const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
|
||||
|
||||
while (start_pos < cur_haystack_end)
|
||||
{
|
||||
if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
|
||||
{
|
||||
/// Copy prefix before match
|
||||
copyToOutput(start_pos, match - start_pos, res_data, res_offset);
|
||||
|
||||
/// Insert replacement for match
|
||||
copyToOutput(replacement.data(), replacement.size(), res_data, res_offset);
|
||||
|
||||
last_match = match;
|
||||
start_pos = match + cur_needle_length;
|
||||
|
||||
if constexpr (replace == ReplaceStringTraits::Replace::First)
|
||||
break;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/// Copy suffix after last match
|
||||
size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
|
||||
: (cur_haystack_end - last_match - cur_needle_length + 1);
|
||||
copyToOutput(start_pos, bytes, res_data, res_offset);
|
||||
|
||||
res_offsets[i] = res_offset;
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_needle_offset = needle_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorConstantVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const String & needle,
|
||||
const ColumnString::Chars & replacement_data,
|
||||
const ColumnString::Offsets & replacement_offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
chassert(haystack_offsets.size() == replacement_offsets.size());
|
||||
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
|
||||
res_data.reserve(haystack_data.size());
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_replacement_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
|
||||
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
const auto * const cur_replacement_data = &replacement_data[prev_replacement_offset];
|
||||
const size_t cur_replacement_length = replacement_offsets[i] - prev_replacement_offset - 1;
|
||||
|
||||
/// Using "slow" "stdlib searcher instead of Volnitsky just to keep things simple
|
||||
StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(needle.data(), needle.size());
|
||||
|
||||
const auto * last_match = static_cast<UInt8 *>(nullptr);
|
||||
const auto * start_pos = cur_haystack_data;
|
||||
const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
|
||||
|
||||
while (start_pos < cur_haystack_end)
|
||||
{
|
||||
if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
|
||||
{
|
||||
/// Copy prefix before match
|
||||
copyToOutput(start_pos, match - start_pos, res_data, res_offset);
|
||||
|
||||
/// Insert replacement for match
|
||||
copyToOutput(cur_replacement_data, cur_replacement_length, res_data, res_offset);
|
||||
|
||||
last_match = match;
|
||||
start_pos = match + needle.size();
|
||||
|
||||
if constexpr (replace == ReplaceStringTraits::Replace::First)
|
||||
break;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/// Copy suffix after last match
|
||||
size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
|
||||
: (cur_haystack_end - last_match - needle.size() + 1);
|
||||
copyToOutput(start_pos, bytes, res_data, res_offset);
|
||||
|
||||
res_offsets[i] = res_offset;
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_replacement_offset = replacement_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void vectorVectorVector(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const ColumnString::Chars & needle_data,
|
||||
const ColumnString::Offsets & needle_offsets,
|
||||
const ColumnString::Chars & replacement_data,
|
||||
const ColumnString::Offsets & replacement_offsets,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
chassert(haystack_offsets.size() == needle_offsets.size());
|
||||
chassert(needle_offsets.size() == replacement_offsets.size());
|
||||
|
||||
const size_t haystack_size = haystack_offsets.size();
|
||||
|
||||
res_data.reserve(haystack_data.size());
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
|
||||
size_t prev_haystack_offset = 0;
|
||||
size_t prev_needle_offset = 0;
|
||||
size_t prev_replacement_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < haystack_size; ++i)
|
||||
{
|
||||
const auto * const cur_haystack_data = &haystack_data[prev_haystack_offset];
|
||||
const size_t cur_haystack_length = haystack_offsets[i] - prev_haystack_offset - 1;
|
||||
|
||||
const auto * const cur_needle_data = &needle_data[prev_needle_offset];
|
||||
const size_t cur_needle_length = needle_offsets[i] - prev_needle_offset - 1;
|
||||
|
||||
const auto * const cur_replacement_data = &replacement_data[prev_replacement_offset];
|
||||
const size_t cur_replacement_length = replacement_offsets[i] - prev_replacement_offset - 1;
|
||||
|
||||
if (cur_needle_length == 0)
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
/// Using "slow" "stdlib searcher instead of Volnitsky because there is a different pattern in each row
|
||||
StdLibASCIIStringSearcher</*CaseInsensitive*/ false> searcher(cur_needle_data, cur_needle_length);
|
||||
|
||||
const auto * last_match = static_cast<UInt8 *>(nullptr);
|
||||
const auto * start_pos = cur_haystack_data;
|
||||
const auto * const cur_haystack_end = cur_haystack_data + cur_haystack_length;
|
||||
|
||||
while (start_pos < cur_haystack_end)
|
||||
{
|
||||
if (const auto * const match = searcher.search(start_pos, cur_haystack_end); match != cur_haystack_end)
|
||||
{
|
||||
/// Copy prefix before match
|
||||
copyToOutput(start_pos, match - start_pos, res_data, res_offset);
|
||||
|
||||
/// Insert replacement for match
|
||||
copyToOutput(cur_replacement_data, cur_replacement_length, res_data, res_offset);
|
||||
|
||||
last_match = match;
|
||||
start_pos = match + cur_needle_length;
|
||||
|
||||
if constexpr (replace == ReplaceStringTraits::Replace::First)
|
||||
break;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/// Copy suffix after last match
|
||||
size_t bytes = (last_match == nullptr) ? (cur_haystack_end - cur_haystack_data + 1)
|
||||
: (cur_haystack_end - last_match - cur_needle_length + 1);
|
||||
copyToOutput(start_pos, bytes, res_data, res_offset);
|
||||
|
||||
res_offsets[i] = res_offset;
|
||||
|
||||
prev_haystack_offset = haystack_offsets[i];
|
||||
prev_needle_offset = needle_offsets[i];
|
||||
prev_replacement_offset = replacement_offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
/// Note: this function converts fixed-length strings to variable-length strings
|
||||
/// and each variable-length string should ends with zero byte.
|
||||
static void vectorFixedConstantConstant(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
size_t n,
|
||||
const String & needle,
|
||||
const String & replacement,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offsets & res_offsets)
|
||||
{
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
const UInt8 * const begin = haystack_data.data();
|
||||
const UInt8 * const end = haystack_data.data() + haystack_data.size();
|
||||
const UInt8 * pos = begin;
|
||||
|
||||
ColumnString::Offset res_offset = 0;
|
||||
size_t haystack_size = haystack_data.size() / n;
|
||||
res_data.reserve(haystack_data.size());
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
/// The current index in the string array.
|
||||
size_t i = 0;
|
||||
@ -139,13 +384,13 @@ struct ReplaceStringImpl
|
||||
|
||||
/// Copy skipped strings without any changes but
|
||||
/// add zero byte to the end of each string.
|
||||
while (i < count && begin + n * (i + 1) <= match)
|
||||
while (i < haystack_size && begin + n * (i + 1) <= match)
|
||||
{
|
||||
COPY_REST_OF_CURRENT_STRING();
|
||||
}
|
||||
|
||||
/// If you have reached the end, it's time to stop
|
||||
if (i == count)
|
||||
if (i == haystack_size)
|
||||
break;
|
||||
|
||||
/// Copy unchanged part of current string.
|
||||
|
@ -13,7 +13,7 @@ struct NameReplaceAll
|
||||
static constexpr auto name = "replaceAll";
|
||||
};
|
||||
|
||||
using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<ReplaceStringTraits::Replace::All>, NameReplaceAll>;
|
||||
using FunctionReplaceAll = FunctionStringReplace<ReplaceStringImpl<NameReplaceAll, ReplaceStringTraits::Replace::All>, NameReplaceAll>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ struct NameReplaceOne
|
||||
static constexpr auto name = "replaceOne";
|
||||
};
|
||||
|
||||
using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<ReplaceStringTraits::Replace::First>, NameReplaceOne>;
|
||||
using FunctionReplaceOne = FunctionStringReplace<ReplaceStringImpl<NameReplaceOne, ReplaceStringTraits::Replace::First>, NameReplaceOne>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ struct NameReplaceRegexpAll
|
||||
static constexpr auto name = "replaceRegexpAll";
|
||||
};
|
||||
|
||||
using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<ReplaceRegexpTraits::Replace::All>, NameReplaceRegexpAll>;
|
||||
using FunctionReplaceRegexpAll = FunctionStringReplace<ReplaceRegexpImpl<NameReplaceRegexpAll, ReplaceRegexpTraits::Replace::All>, NameReplaceRegexpAll>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,7 @@ struct NameReplaceRegexpOne
|
||||
static constexpr auto name = "replaceRegexpOne";
|
||||
};
|
||||
|
||||
using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<ReplaceRegexpTraits::Replace::First>, NameReplaceRegexpOne>;
|
||||
using FunctionReplaceRegexpOne = FunctionStringReplace<ReplaceRegexpImpl<NameReplaceRegexpOne, ReplaceRegexpTraits::Replace::First>, NameReplaceRegexpOne>;
|
||||
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@ FOO
|
||||
foo
|
||||
FOO
|
||||
baz
|
||||
zzz
|
||||
2
|
||||
fo
|
||||
oo
|
||||
|
@ -5,6 +5,7 @@ select ucase('foo');
|
||||
select LOWER('Foo');
|
||||
select UPPER('Foo');
|
||||
select REPLACE('bar', 'r', 'z');
|
||||
select REGEXP_REPLACE('bar', '.', 'z');
|
||||
select Locate('foo', 'o');
|
||||
select SUBSTRING('foo', 1, 2);
|
||||
select Substr('foo', 2);
|
||||
|
@ -0,0 +1,77 @@
|
||||
** replaceAll() **
|
||||
- non-const needle, const replacement
|
||||
1 Hello World l x Hexxo Worxd
|
||||
2 Hello World ll x Hexo World
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hello World
|
||||
5 Hello World . x Hello World
|
||||
- const needle, non-const replacement
|
||||
1 Hello World l xx Hexxxxo Worxxd
|
||||
2 Hello World l x Hexxo Worxd
|
||||
3 Hello World l x Hexxo Worxd
|
||||
4 Hello World l x Hexxo Worxd
|
||||
5 Hello World l x Hexxo Worxd
|
||||
- non-const needle, non-const replacement
|
||||
1 Hello World l xx Hexxxxo Worxxd
|
||||
2 Hello World ll x Hexo World
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hello World
|
||||
5 Hello World . x Hello World
|
||||
** replaceOne() **
|
||||
- non-const needle, const replacement
|
||||
1 Hello World l x Hexlo World
|
||||
2 Hello World ll x Hexo World
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hello World
|
||||
5 Hello World . x Hello World
|
||||
- const needle, non-const replacement
|
||||
1 Hello World l xx Hexxlo World
|
||||
2 Hello World l x Hexlo World
|
||||
3 Hello World l x Hexlo World
|
||||
4 Hello World l x Hexlo World
|
||||
5 Hello World l x Hexlo World
|
||||
- non-const needle, non-const replacement
|
||||
1 Hello World l xx Hexxlo World
|
||||
2 Hello World ll x Hexo World
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hello World
|
||||
5 Hello World . x Hello World
|
||||
** replaceRegexpAll() **
|
||||
- non-const needle, const replacement
|
||||
1 Hello World l x Hexxo Worxd
|
||||
2 Hello World ll x Hexo World
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hxllx Wxrld
|
||||
5 Hello World . x xxxxxxxxxxx
|
||||
- const needle, non-const replacement
|
||||
1 Hello World l xx Hexxxxo Worxxd
|
||||
2 Hello World l x Hexxo Worxd
|
||||
3 Hello World l x Hexxo Worxd
|
||||
4 Hello World l x Hexxo Worxd
|
||||
5 Hello World l x Hexxo Worxd
|
||||
- non-const needle, non-const replacement
|
||||
1 Hello World l xx Hexxxxo Worxxd
|
||||
2 Hello World ll x Hexo World
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hxllx Wxrld
|
||||
5 Hello World . x xxxxxxxxxxx
|
||||
** replaceRegexpOne() **
|
||||
- non-const needle, const replacement
|
||||
1 Hello World l x Hexlo World
|
||||
2 Hello World ll x Hexo World
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hxllo World
|
||||
5 Hello World . x xello World
|
||||
- const needle, non-const replacement
|
||||
1 Hello World l xx Hexxlo World
|
||||
2 Hello World l x Hexlo World
|
||||
3 Hello World l x Hexlo World
|
||||
4 Hello World l x Hexlo World
|
||||
5 Hello World l x Hexlo World
|
||||
- non-const needle, non-const replacement
|
||||
1 Hello World l xx Hexxlo World
|
||||
2 Hello World ll x Hexo World
|
||||
3 Hello World not_found x Hello World
|
||||
4 Hello World [eo] x Hxllo World
|
||||
5 Hello World . x xello World
|
||||
Check that an exception is thrown if the needle is empty
|
@ -0,0 +1,90 @@
|
||||
-- Tests that functions replaceOne(), replaceAll(), replaceRegexpOne(), replaceRegexpAll() work with with non-const pattern and replacement arguments
|
||||
|
||||
DROP TABLE IF EXISTS test_tab;
|
||||
|
||||
CREATE TABLE test_tab
|
||||
(id UInt32, haystack String, needle String, replacement String)
|
||||
engine = MergeTree()
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'xx') (2, 'Hello World', 'll', 'x') (3, 'Hello World', 'not_found', 'x') (4, 'Hello World', '[eo]', 'x') (5, 'Hello World', '.', 'x')
|
||||
|
||||
SELECT '** replaceAll() **';
|
||||
|
||||
SELECT '- non-const needle, const replacement';
|
||||
SELECT id, haystack, needle, 'x', replaceAll(haystack, needle, 'x') FROM test_tab ORDER BY id;
|
||||
|
||||
SELECT '- const needle, non-const replacement';
|
||||
SELECT id, haystack, 'l', replacement, replaceAll(haystack, 'l', replacement) FROM test_tab ORDER BY id;
|
||||
|
||||
SELECT '- non-const needle, non-const replacement';
|
||||
SELECT id, haystack, needle, replacement, replaceAll(haystack, needle, replacement) FROM test_tab ORDER BY id;
|
||||
|
||||
SELECT '** replaceOne() **';
|
||||
|
||||
SELECT '- non-const needle, const replacement';
|
||||
SELECT id, haystack, needle, 'x', replaceOne(haystack, needle, 'x') FROM test_tab ORDER BY id;
|
||||
|
||||
|
||||
SELECT '- const needle, non-const replacement';
|
||||
SELECT id, haystack, 'l', replacement, replaceOne(haystack, 'l', replacement) FROM test_tab ORDER BY id;
|
||||
|
||||
|
||||
SELECT '- non-const needle, non-const replacement';
|
||||
SELECT id, haystack, needle, replacement, replaceOne(haystack, needle, replacement) FROM test_tab ORDER BY id;
|
||||
|
||||
SELECT '** replaceRegexpAll() **';
|
||||
|
||||
SELECT '- non-const needle, const replacement';
|
||||
SELECT id, haystack, needle, 'x', replaceRegexpAll(haystack, needle, 'x') FROM test_tab ORDER BY id;
|
||||
|
||||
SELECT '- const needle, non-const replacement';
|
||||
SELECT id, haystack, 'l', replacement, replaceRegexpAll(haystack, 'l', replacement) FROM test_tab ORDER BY id;
|
||||
|
||||
SELECT '- non-const needle, non-const replacement';
|
||||
SELECT id, haystack, needle, replacement, replaceRegexpAll(haystack, needle, replacement) FROM test_tab ORDER BY id;
|
||||
|
||||
SELECT '** replaceRegexpOne() **';
|
||||
|
||||
SELECT '- non-const needle, const replacement';
|
||||
SELECT id, haystack, needle, 'x', replaceRegexpOne(haystack, needle, 'x') FROM test_tab ORDER BY id;
|
||||
|
||||
|
||||
SELECT '- const needle, non-const replacement';
|
||||
SELECT id, haystack, 'l', replacement, replaceRegexpOne(haystack, 'l', replacement) FROM test_tab ORDER BY id;
|
||||
|
||||
|
||||
SELECT '- non-const needle, non-const replacement';
|
||||
SELECT id, haystack, needle, replacement, replaceRegexpOne(haystack, needle, replacement) FROM test_tab ORDER BY id;
|
||||
|
||||
DROP TABLE IF EXISTS test_tab;
|
||||
|
||||
|
||||
SELECT 'Check that an exception is thrown if the needle is empty';
|
||||
|
||||
CREATE TABLE test_tab
|
||||
(id UInt32, haystack String, needle String, replacement String)
|
||||
engine = MergeTree()
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO test_tab VALUES (1, 'Hello World', 'l', 'x') (2, 'Hello World', '', 'y')
|
||||
|
||||
-- needle: non-const, replacement: const
|
||||
SELECT replaceAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpAll(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpOne(haystack, needle, 'x') FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
|
||||
-- needle: const, replacement: non-const
|
||||
SELECT replaceAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpAll(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpOne(haystack, '', replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
|
||||
-- needle: non-const, replacement: non-const
|
||||
SELECT replaceAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpAll(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
SELECT replaceRegexpOne(haystack, needle, replacement) FROM test_tab; -- { serverError ARGUMENT_OUT_OF_BOUND }
|
||||
|
||||
DROP TABLE IF EXISTS test_tab;
|
Loading…
Reference in New Issue
Block a user