Merge pull request #69326 from rschu1ze/overlay-followup

Minor follow-up for #66933
This commit is contained in:
Robert Schulze 2024-09-08 09:07:54 +00:00 committed by GitHub
commit 26f21a0d94
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 38 additions and 23 deletions

View File

@ -20,10 +20,10 @@ overlay(s, replace, offset[, length])
**Parameters**
- `input`: A string type [String](../data-types/string.md).
- `s`: A string type [String](../data-types/string.md).
- `replace`: A string type [String](../data-types/string.md).
- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string.
- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of bytes removed from `input` equals the length of `replace`; otherwise `length` bytes are removed.
- `offset`: An integer type [Int](../data-types/int-uint.md) (1-based). If `offset` is negative, it is counted from the end of the string `s`.
- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within the input string `s` to be replaced. If `length` is not specified, the number of bytes removed from `s` equals the length of `replace`; otherwise `length` bytes are removed.
**Returned value**
@ -32,22 +32,35 @@ overlay(s, replace, offset[, length])
**Example**
```sql
SELECT overlay('ClickHouse SQL', 'CORE', 12) AS res;
SELECT overlay('My father is from Mexico.', 'mother', 4) AS res;
```
Result:
```text
┌─res─────────────┐
│ ClickHouse CORE │
└─────────────────┘
┌─res──────────────────────┐
│ My mother is from Mexico.│
└──────────────────────────┘
```
```sql
SELECT overlay('My father is from Mexico.', 'dad', 4, 6) AS res;
```
Result:
```text
┌─res───────────────────┐
│ My dad is from Mexico.│
└───────────────────────┘
```
## overlayUTF8
Replace part of the string `input` with another string `replace`, starting at the 1-based index `offset`.
Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
Assumes that the string contains valid UTF-8 encoded text.
If this assumption is violated, no exception is thrown and the result is undefined.
**Syntax**
@ -59,8 +72,8 @@ overlayUTF8(s, replace, offset[, length])
- `s`: A string type [String](../data-types/string.md).
- `replace`: A string type [String](../data-types/string.md).
- `offset`: An integer type [Int](../data-types/int-uint.md). If `offset` is negative, it is counted from the end of the `input` string.
- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within input to be replaced. If `length` is not specified, the number of characters removed from `input` equals the length of `replace`; otherwise `length` characters are removed.
- `offset`: An integer type [Int](../data-types/int-uint.md) (1-based). If `offset` is negative, it is counted from the end of the input string `s`.
- `length`: Optional. An integer type [Int](../data-types/int-uint.md). `length` specifies the length of the snippet within the input string `s` to be replaced. If `length` is not specified, the number of characters removed from `s` equals the length of `replace`; otherwise `length` characters are removed.
**Returned value**
@ -69,15 +82,15 @@ overlayUTF8(s, replace, offset[, length])
**Example**
```sql
SELECT overlayUTF8('ClickHouse是一款OLAP数据库', '开源', 12, 2) AS res;
SELECT overlay('Mein Vater ist aus Österreich.', 'der Türkei', 20) AS res;
```
Result:
```text
┌─res────────────────────────┐
ClickHouse是开源OLAP数据库
└────────────────────────────┘
┌─res───────────────────────────
Mein Vater ist aus der Türkei.
└───────────────────────────────
```
## replaceOne

View File

@ -1,12 +1,12 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Common/StringUtils.h>
#include <Common/UTF8Helpers.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/GatherUtils/Sources.h>
#include <Functions/IFunction.h>
#include <Common/StringUtils.h>
#include <Common/UTF8Helpers.h>
namespace DB
{
@ -16,8 +16,8 @@ namespace
/// If 'is_utf8' - measure offset and length in code points instead of bytes.
/// Syntax:
/// - overlay(input, replace, offset[, length])
/// - overlayUTF8(input, replace, offset[, length]) - measure offset and length in code points instead of bytes
/// - overlay(s, replace, offset[, length])
/// - overlayUTF8(s, replace, offset[, length]) - measure offset and length in code points instead of bytes
template <bool is_utf8>
class FunctionOverlay : public IFunction
{
@ -34,7 +34,7 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
FunctionArgumentDescriptors mandatory_args{
{"input", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
{"s", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
{"replace", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isString), nullptr, "String"},
{"offset", static_cast<FunctionArgumentDescriptor::TypeValidator>(&isNativeInteger), nullptr, "(U)Int8/16/32/64"},
};
@ -100,7 +100,6 @@ public:
res_data.reserve(col_input_string->getChars().size());
}
#define OVERLAY_EXECUTE_CASE(HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST) \
if (input_is_const && replace_is_const) \
constantConstant<HAS_FOUR_ARGS, OFFSET_IS_CONST, LENGTH_IS_CONST>( \
@ -186,7 +185,6 @@ public:
return res_col;
}
private:
/// input offset is 1-based, maybe negative
/// output result is 0-based valid offset, within [0, input_size]
@ -229,6 +227,7 @@ private:
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets) const
{
/// Free us from handling negative length in the code below
if (has_four_args && length_is_const && const_length < 0)
{
constantConstant<true, offset_is_const, false>(
@ -343,6 +342,7 @@ private:
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets) const
{
/// Free us from handling negative length in the code below
if (has_four_args && length_is_const && const_length < 0)
{
vectorConstant<true, offset_is_const, false>(
@ -461,6 +461,7 @@ private:
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets) const
{
/// Free us from handling negative length in the code below
if (has_four_args && length_is_const && const_length < 0)
{
constantVector<true, offset_is_const, false>(
@ -577,6 +578,7 @@ private:
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets) const
{
/// Free us from handling negative length in the code below
if (has_four_args && length_is_const && const_length < 0)
{
vectorVector<true, offset_is_const, false>(

View File

@ -35,10 +35,10 @@ SELECT overlay('Spark SQL', materialize('ANSI '), materialize(7), materialize(0)
SELECT overlay(materialize('Spark SQL'), materialize('ANSI '), materialize(7), materialize(0)), overlayUTF8(materialize('Spark SQL和CH'), materialize('ANSI '), materialize(7), materialize(0));
SELECT 'Test with special offset values';
WITH number - 12 AS offset SELECT offset, overlay('Spark SQL', '__', offset), overlayUTF8('Spark SQL和CH', '', offset) FROM numbers(26);
WITH number - 12 AS offset SELECT offset, overlay('Spark SQL', '__', offset), overlayUTF8('Spark SQL和CH', '', offset) FROM numbers(26) ORDER BY number;
SELECT 'Test with special length values';
WITH number - 1 AS length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) FROM numbers(8);
WITH number - 1 AS length SELECT length, overlay('Spark SQL', 'ANSI ', 7, length), overlayUTF8('Spark SQL和CH', 'ANSI ', 7, length) FROM numbers(8) ORDER BY number;
SELECT 'Test with special input and replace values';
SELECT overlay('', '_', 6), overlayUTF8('', '_', 6);