mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge pull request #54518 from rschu1ze/split-better
Provide fallback to Python/Spark-like splitting in splitBy*() functions
This commit is contained in:
commit
be1e92a2ac
@ -4067,6 +4067,17 @@ Result:
|
||||
└─────┴─────┴───────┘
|
||||
```
|
||||
|
||||
## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string}
|
||||
|
||||
Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array.
|
||||
|
||||
Possible values:
|
||||
|
||||
- `0` - The remaining string will not be included in the last element of the result array.
|
||||
- `1` - The remaining string will be included in the last element of the result array. This is the behavior of Spark's [`split()`](https://spark.apache.org/docs/3.1.2/api/python/reference/api/pyspark.sql.functions.split.html) function and Python's ['string.split()'](https://docs.python.org/3/library/stdtypes.html#str.split) method.
|
||||
|
||||
Default value: `0`
|
||||
|
||||
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
|
||||
|
||||
Enables or disables returning results of type:
|
||||
|
@ -21,7 +21,7 @@ splitByChar(separator, s[, max_substrings]))
|
||||
|
||||
- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md).
|
||||
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
|
||||
- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
|
||||
- `max_substrings` — An optional `Int64` defaulting to 0. If `max_substrings` > 0, the returned array will contain at most `max_substrings` substrings, otherwise the function will return as many substrings as possible.
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
@ -38,6 +38,10 @@ The behavior of parameter `max_substrings` changed starting with ClickHouse v22.
|
||||
For example,
|
||||
- in v22.10: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b','c=d']`
|
||||
- in v22.11: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b']`
|
||||
|
||||
A behavior similar to ClickHouse pre-v22.11 can be achieved by setting
|
||||
[splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string)
|
||||
`SELECT splitByChar('=', 'a=b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1 -- ['a', 'b=c=d']`
|
||||
:::
|
||||
|
||||
**Example**
|
||||
@ -80,6 +84,8 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere
|
||||
- There are multiple consecutive non-empty separators;
|
||||
- The original string `s` is empty while the separator is not empty.
|
||||
|
||||
Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
@ -133,6 +139,8 @@ Returns an array of selected substrings. Empty substrings may be selected when:
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
@ -182,6 +190,8 @@ Returns an array of selected substrings.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
@ -219,6 +229,8 @@ Returns an array of selected substrings.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
@ -279,6 +291,8 @@ Returns an array of selected substrings.
|
||||
|
||||
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
|
||||
|
||||
Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0.
|
||||
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
|
@ -503,6 +503,7 @@ class IColumn;
|
||||
M(Bool, reject_expensive_hyperscan_regexps, true, "Reject patterns which will likely be expensive to evaluate with hyperscan (due to NFA state explosion)", 0) \
|
||||
M(Bool, allow_simdjson, true, "Allow using simdjson library in 'JSON*' functions if AVX2 instructions are available. If disabled rapidjson will be used.", 0) \
|
||||
M(Bool, allow_introspection_functions, false, "Allow functions for introspection of ELF and DWARF for query profiling. These functions are slow and may impose security considerations.", 0) \
|
||||
M(Bool, splitby_max_substrings_includes_remaining_string, false, "Functions 'splitBy*()' with 'max_substrings' argument > 0 include the remaining string as last element in the result", 0) \
|
||||
\
|
||||
M(Bool, allow_execute_multiif_columnar, true, "Allow execute multiIf function columnar", 0) \
|
||||
M(Bool, formatdatetime_f_prints_single_zero, false, "Formatter '%f' in function 'formatDateTime()' produces a single zero instead of six zeros if the formatted value has no fractional seconds.", 0) \
|
||||
|
@ -104,7 +104,7 @@ void validateArgumentType(const IFunction & func, const DataTypes & arguments,
|
||||
|
||||
const auto & argument = arguments[argument_index];
|
||||
if (!validator_func(*argument))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {} expected {}",
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {}, expected {}",
|
||||
argument->getName(), std::to_string(argument_index), func.getName(), expected_type_description);
|
||||
}
|
||||
|
||||
|
@ -5,23 +5,53 @@ namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const DataTypes & arguments) const
|
||||
template <typename DataType>
|
||||
std::optional<Int64> extractMaxSplitsImpl(const ColumnWithTypeAndName & argument)
|
||||
{
|
||||
if (arguments.size() != 1 && arguments.size() != 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.",
|
||||
getName(), arguments.size());
|
||||
const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
|
||||
if (!col)
|
||||
return std::nullopt;
|
||||
|
||||
const DataTypeArray * array_type = checkAndGetDataType<DataTypeArray>(arguments[0].get());
|
||||
if (!array_type)
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an array.", getName());
|
||||
auto value = col->template getValue<DataType>();
|
||||
return static_cast<Int64>(value);
|
||||
}
|
||||
|
||||
if (arguments.size() == 2 && !isString(arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be constant string.", getName());
|
||||
std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position)
|
||||
{
|
||||
if (max_substrings_argument_position >= arguments.size())
|
||||
return std::nullopt;
|
||||
|
||||
std::optional<Int64> max_splits;
|
||||
if (!((max_splits = extractMaxSplitsImpl<UInt8>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int8>(arguments[max_substrings_argument_position]))
|
||||
|| (max_splits = extractMaxSplitsImpl<UInt16>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int16>(arguments[max_substrings_argument_position]))
|
||||
|| (max_splits = extractMaxSplitsImpl<UInt32>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int32>(arguments[max_substrings_argument_position]))
|
||||
|| (max_splits = extractMaxSplitsImpl<UInt64>(arguments[max_substrings_argument_position])) || (max_splits = extractMaxSplitsImpl<Int64>(arguments[max_substrings_argument_position]))))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {}, which is {}-th argument",
|
||||
arguments[max_substrings_argument_position].column->getName(),
|
||||
max_substrings_argument_position + 1);
|
||||
|
||||
if (*max_splits <= 0)
|
||||
return std::nullopt;
|
||||
|
||||
return max_splits;
|
||||
}
|
||||
|
||||
DataTypePtr FunctionArrayStringConcat::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const
|
||||
{
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"arr", &isArray<IDataType>, nullptr, "Array"},
|
||||
};
|
||||
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"separator", &isString<IDataType>, isColumnConst, "const String"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
@ -23,10 +23,8 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
@ -56,6 +54,7 @@ namespace ErrorCodes
|
||||
|
||||
using Pos = const char *;
|
||||
|
||||
std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position);
|
||||
|
||||
/// Substring generators. All of them have a common interface.
|
||||
|
||||
@ -64,9 +63,11 @@ class SplitByAlphaImpl
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
/// Get the name of the function.
|
||||
static constexpr auto name = "alphaTokens";
|
||||
static String getName() { return name; }
|
||||
|
||||
@ -74,37 +75,33 @@ public:
|
||||
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
/// Check the type of the function's arguments.
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
if (arguments.empty() || arguments.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"s", &isString<IDataType>, nullptr, "String"},
|
||||
};
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
|
||||
"Must be String.", arguments[0]->getName(), getName());
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
|
||||
}
|
||||
|
||||
/// Initialize by the function arguments.
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/) {}
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
}
|
||||
|
||||
/// Returns the position of the argument, that is the column of strings
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return 1;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
@ -119,10 +116,27 @@ public:
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && isAlphaASCII(*pos))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
++splits;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -133,6 +147,9 @@ class SplitByNonAlphaImpl
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
/// Get the name of the function.
|
||||
@ -142,37 +159,25 @@ public:
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
/// Check the type of the function's arguments.
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
if (arguments.empty() || arguments.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
|
||||
"Must be String.", arguments[0]->getName(), getName());
|
||||
SplitByAlphaImpl::checkArguments(func, arguments);
|
||||
}
|
||||
|
||||
/// Initialize by the function arguments.
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/) {}
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
}
|
||||
|
||||
/// Returns the position of the argument, that is the column of strings
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return 1;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
@ -187,10 +192,27 @@ public:
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
splits++;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -201,46 +223,36 @@ class SplitByWhitespaceImpl
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
/// Get the name of the function.
|
||||
static constexpr auto name = "splitByWhitespace";
|
||||
static String getName() { return name; }
|
||||
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
/// Check the type of the function's arguments.
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
if (arguments.empty() || arguments.size() > 2)
|
||||
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", getName());
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
|
||||
"Must be String.", arguments[0]->getName(), getName());
|
||||
return SplitByNonAlphaImpl::checkArguments(func, arguments);
|
||||
}
|
||||
|
||||
/// Initialize by the function arguments.
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/) {}
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 1);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
}
|
||||
|
||||
/// Returns the position of the argument, that is the column of strings
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return 1;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
@ -255,10 +267,27 @@ public:
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
while (pos < end && !isWhitespaceASCII(*pos))
|
||||
++pos;
|
||||
|
||||
token_end = pos;
|
||||
splits++;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -269,7 +298,10 @@ class SplitByCharImpl
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
char sep;
|
||||
char separator;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByChar";
|
||||
@ -277,24 +309,23 @@ public:
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
if (arguments.size() < 2 || arguments.size() > 3)
|
||||
throw Exception(
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
|
||||
"Function '{}' needs at least 2 arguments, at most 3 arguments; passed {}.",
|
||||
name, arguments.size());
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"separator", &isString<IDataType>, isColumnConst, "const String"},
|
||||
{"s", &isString<IDataType>, nullptr, "String"}
|
||||
};
|
||||
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
|
||||
"Must be String.", arguments[0]->getName(), getName());
|
||||
FunctionArgumentDescriptors optional_args{
|
||||
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
|
||||
};
|
||||
|
||||
if (!isString(arguments[1]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}. "
|
||||
"Must be String.", arguments[1]->getName(), getName());
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
|
||||
}
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments)
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
@ -307,25 +338,17 @@ public:
|
||||
if (sep_str.size() != 1)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", getName());
|
||||
|
||||
sep = sep_str[0];
|
||||
}
|
||||
separator = sep_str[0];
|
||||
|
||||
/// Returns the position of the argument, that is the column of strings
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return 2;
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
@ -334,12 +357,29 @@ public:
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
pos = reinterpret_cast<Pos>(memchr(pos, sep, end - pos));
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
|
||||
if (pos)
|
||||
{
|
||||
token_end = pos;
|
||||
++pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
token_end = end;
|
||||
@ -354,8 +394,10 @@ class SplitByStringImpl
|
||||
private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
|
||||
String sep;
|
||||
String separator;
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByString";
|
||||
@ -363,12 +405,14 @@ public:
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
SplitByCharImpl::checkArguments(arguments);
|
||||
SplitByCharImpl::checkArguments(func, arguments);
|
||||
}
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments)
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
@ -376,19 +420,10 @@ public:
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[0].column->getName(), getName());
|
||||
|
||||
sep = col->getValue<String>();
|
||||
}
|
||||
separator = col->getValue<String>();
|
||||
|
||||
/// Returns the position of the argument that is the column of strings
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return 2;
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
@ -396,19 +431,38 @@ public:
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
bool get(Pos & token_begin, Pos & token_end)
|
||||
{
|
||||
if (sep.empty())
|
||||
if (separator.empty())
|
||||
{
|
||||
if (pos == end)
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
token_end = pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -417,12 +471,28 @@ public:
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
pos = reinterpret_cast<Pos>(memmem(pos, end - pos, sep.data(), sep.size()));
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
|
||||
if (pos)
|
||||
{
|
||||
token_end = pos;
|
||||
pos += sep.size();
|
||||
pos += separator.size();
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
token_end = end;
|
||||
@ -441,6 +511,10 @@ private:
|
||||
Pos pos;
|
||||
Pos end;
|
||||
|
||||
std::optional<size_t> max_splits;
|
||||
size_t splits;
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = "splitByRegexp";
|
||||
static String getName() { return name; }
|
||||
@ -448,14 +522,14 @@ public:
|
||||
static bool isVariadic() { return true; }
|
||||
static size_t getNumberOfArguments() { return 0; }
|
||||
|
||||
/// Check the type of function arguments.
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
SplitByStringImpl::checkArguments(arguments);
|
||||
SplitByStringImpl::checkArguments(func, arguments);
|
||||
}
|
||||
|
||||
/// Initialize by the function arguments.
|
||||
void init(const ColumnsWithTypeAndName & arguments)
|
||||
static constexpr auto strings_argument_position = 1uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
|
||||
|
||||
@ -465,18 +539,9 @@ public:
|
||||
|
||||
if (!col->getValue<String>().empty())
|
||||
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
}
|
||||
|
||||
/// Returns the position of the argument that is the column of strings
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return 2;
|
||||
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
|
||||
max_splits = extractMaxSplits(arguments, 2);
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
@ -484,6 +549,7 @@ public:
|
||||
{
|
||||
pos = pos_;
|
||||
end = end_;
|
||||
splits = 0;
|
||||
}
|
||||
|
||||
/// Get the next token, if any, or return false.
|
||||
@ -495,8 +561,26 @@ public:
|
||||
return false;
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = end;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
pos += 1;
|
||||
token_end = pos;
|
||||
++splits;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -505,6 +589,22 @@ public:
|
||||
|
||||
token_begin = pos;
|
||||
|
||||
if (max_splits)
|
||||
{
|
||||
if (max_substrings_includes_remaining_string)
|
||||
{
|
||||
if (splits == *max_splits - 1)
|
||||
{
|
||||
token_end = end;
|
||||
pos = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (splits == *max_splits)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!re->match(pos, end - pos, matches) || !matches[0].length)
|
||||
{
|
||||
token_end = end;
|
||||
@ -514,6 +614,7 @@ public:
|
||||
{
|
||||
token_end = pos + matches[0].offset;
|
||||
pos = token_end + matches[0].length;
|
||||
++splits;
|
||||
}
|
||||
}
|
||||
|
||||
@ -536,14 +637,19 @@ public:
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 2; }
|
||||
|
||||
/// Check the type of function arguments.
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
SplitByStringImpl::checkArguments(arguments);
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"haystack", &isString<IDataType>, nullptr, "String"},
|
||||
{"pattern", &isString<IDataType>, isColumnConst, "const String"}
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args);
|
||||
}
|
||||
|
||||
/// Initialize by the function arguments.
|
||||
void init(const ColumnsWithTypeAndName & arguments)
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
|
||||
{
|
||||
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||
|
||||
@ -557,18 +663,6 @@ public:
|
||||
matches.resize(capture + 1);
|
||||
}
|
||||
|
||||
/// Returns the position of the argument that is the column of strings
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
{
|
||||
@ -607,33 +701,30 @@ public:
|
||||
template <typename Generator>
|
||||
class FunctionTokens : public IFunction
|
||||
{
|
||||
private:
|
||||
bool max_substrings_includes_remaining_string;
|
||||
|
||||
public:
|
||||
static constexpr auto name = Generator::name;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionTokens>(); }
|
||||
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
|
||||
|
||||
String getName() const override
|
||||
explicit FunctionTokens<Generator>(ContextPtr context)
|
||||
{
|
||||
return name;
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
|
||||
}
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
|
||||
bool isVariadic() const override { return Generator::isVariadic(); }
|
||||
|
||||
size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
Generator::checkArguments(arguments);
|
||||
|
||||
const auto max_substrings_pos = Generator::getMaxSubstringsArgumentPosition();
|
||||
if (max_substrings_pos && *max_substrings_pos < arguments.size() && !isNativeInteger(arguments[*max_substrings_pos]))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"{}-th argument for function '{}' must be integer, got '{}' instead",
|
||||
*max_substrings_pos + 1,
|
||||
getName(),
|
||||
arguments[*max_substrings_pos]->getName());
|
||||
Generator::checkArguments(*this, arguments);
|
||||
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
|
||||
}
|
||||
@ -641,23 +732,21 @@ public:
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
Generator generator;
|
||||
generator.init(arguments);
|
||||
const auto & array_argument = arguments[generator.getStringsArgumentPosition()];
|
||||
generator.init(arguments, max_substrings_includes_remaining_string);
|
||||
|
||||
/// Whether we need to limit max tokens returned by Generator::get
|
||||
/// If max_substrings is std::nullopt, no limit is applied.
|
||||
auto max_substrings = getMaxSubstrings(arguments);
|
||||
const auto & array_argument = arguments[generator.strings_argument_position];
|
||||
|
||||
const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
|
||||
const ColumnConst * col_const_str =
|
||||
checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
|
||||
const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
|
||||
|
||||
auto col_res = ColumnArray::create(ColumnString::create());
|
||||
|
||||
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
|
||||
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
|
||||
ColumnString::Chars & res_strings_chars = res_strings.getChars();
|
||||
ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
|
||||
|
||||
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
|
||||
|
||||
if (col_str)
|
||||
{
|
||||
const ColumnString::Chars & src_chars = col_str->getChars();
|
||||
@ -682,7 +771,7 @@ public:
|
||||
|
||||
generator.set(pos, end);
|
||||
size_t j = 0;
|
||||
while (generator.get(token_begin, token_end) && !(max_substrings && j >= *max_substrings))
|
||||
while (generator.get(token_begin, token_end))
|
||||
{
|
||||
size_t token_size = token_end - token_begin;
|
||||
|
||||
@ -701,65 +790,24 @@ public:
|
||||
|
||||
return col_res;
|
||||
}
|
||||
else if (col_const_str)
|
||||
else if (col_str_const)
|
||||
{
|
||||
String src = col_const_str->getValue<String>();
|
||||
String src = col_str_const->getValue<String>();
|
||||
Array dst;
|
||||
|
||||
generator.set(src.data(), src.data() + src.size());
|
||||
Pos token_begin = nullptr;
|
||||
Pos token_end = nullptr;
|
||||
|
||||
while (generator.get(token_begin, token_end) && !(max_substrings && dst.size() >= *max_substrings))
|
||||
while (generator.get(token_begin, token_end))
|
||||
dst.push_back(String(token_begin, token_end - token_begin));
|
||||
|
||||
return result_type->createColumnConst(col_const_str->size(), dst);
|
||||
return result_type->createColumnConst(col_str_const->size(), dst);
|
||||
}
|
||||
else
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
|
||||
array_argument.column->getName(), array_argument.column->getName(), getName());
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename DataType>
|
||||
std::optional<Int64> getMaxSubstringsImpl(const ColumnWithTypeAndName & argument) const
|
||||
{
|
||||
const auto * col = checkAndGetColumnConst<ColumnVector<DataType>>(argument.column.get());
|
||||
if (!col)
|
||||
return {};
|
||||
|
||||
auto value = col->template getValue<DataType>();
|
||||
return static_cast<Int64>(value);
|
||||
}
|
||||
|
||||
std::optional<size_t> getMaxSubstrings(const ColumnsWithTypeAndName & arguments) const
|
||||
{
|
||||
const auto pos = Generator::getMaxSubstringsArgumentPosition();
|
||||
if (!pos)
|
||||
return std::nullopt;
|
||||
|
||||
if (*pos >= arguments.size())
|
||||
return std::nullopt;
|
||||
|
||||
std::optional<Int64> max_substrings;
|
||||
if (!((max_substrings = getMaxSubstringsImpl<UInt8>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int8>(arguments[*pos]))
|
||||
|| (max_substrings = getMaxSubstringsImpl<UInt16>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int16>(arguments[*pos]))
|
||||
|| (max_substrings = getMaxSubstringsImpl<UInt32>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int32>(arguments[*pos]))
|
||||
|| (max_substrings = getMaxSubstringsImpl<UInt64>(arguments[*pos])) || (max_substrings = getMaxSubstringsImpl<Int64>(arguments[*pos]))))
|
||||
throw Exception(
|
||||
ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Illegal column {}, which is {}-th argument of function {}",
|
||||
arguments[*pos].column->getName(),
|
||||
*pos + 1,
|
||||
getName());
|
||||
|
||||
/// If max_substrings is negative or zero, tokenize will be applied as many times as possible, which is equivalent to
|
||||
/// no max_substrings argument in function
|
||||
if (max_substrings && *max_substrings <= 0)
|
||||
return std::nullopt;
|
||||
|
||||
return *max_substrings;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -803,7 +851,7 @@ private:
|
||||
/// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
|
||||
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
|
||||
{
|
||||
if (unlikely(null_map && null_map[current_src_array_offset]))
|
||||
if (null_map && null_map[current_src_array_offset]) [[unlikely]]
|
||||
continue;
|
||||
|
||||
if (!first_non_null)
|
||||
@ -881,7 +929,7 @@ public:
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
|
@ -3,10 +3,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
class URLPathHierarchyImpl
|
||||
{
|
||||
@ -23,26 +19,18 @@ public:
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 1; }
|
||||
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
|
||||
"Must be String.", arguments[0]->getName(), getName());
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"URL", &isString<IDataType>, nullptr, "String"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args);
|
||||
}
|
||||
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/) {}
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
/// Returns the position of the argument that is the column of rows
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
|
@ -3,10 +3,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
class URLHierarchyImpl
|
||||
{
|
||||
@ -22,26 +18,18 @@ public:
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 1; }
|
||||
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
|
||||
"Must be String.", arguments[0]->getName(), getName());
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"URL", &isString<IDataType>, nullptr, "String"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args);
|
||||
}
|
||||
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/) {}
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
/// Returns the position of the argument that is the column of rows
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substring_behavior*/) {}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
|
@ -3,10 +3,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
class ExtractURLParameterNamesImpl
|
||||
{
|
||||
@ -22,27 +18,18 @@ public:
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 1; }
|
||||
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
|
||||
"Must be String.", arguments[0]->getName(), getName());
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"URL", &isString<IDataType>, nullptr, "String"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args);
|
||||
}
|
||||
|
||||
/// Returns the position of the argument that is the column of rows
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/) {}
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
|
@ -3,10 +3,6 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
class ExtractURLParametersImpl
|
||||
{
|
||||
@ -22,26 +18,18 @@ public:
|
||||
static bool isVariadic() { return false; }
|
||||
static size_t getNumberOfArguments() { return 1; }
|
||||
|
||||
static void checkArguments(const DataTypes & arguments)
|
||||
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. "
|
||||
"Must be String.", arguments[0]->getName(), getName());
|
||||
FunctionArgumentDescriptors mandatory_args{
|
||||
{"URL", &isString<IDataType>, nullptr, "String"},
|
||||
};
|
||||
|
||||
validateFunctionArgumentTypes(func, arguments, mandatory_args);
|
||||
}
|
||||
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/) {}
|
||||
void init(const ColumnsWithTypeAndName & /*arguments*/, bool /*max_substrings_includes_remaining_string*/) {}
|
||||
|
||||
/// Returns the position of the argument that is the column of rows
|
||||
static size_t getStringsArgumentPosition()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// Returns the position of the possible max_substrings argument. std::nullopt means max_substrings argument is disabled in current function.
|
||||
static std::optional<size_t> getMaxSubstringsArgumentPosition()
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
static constexpr auto strings_argument_position = 0uz;
|
||||
|
||||
/// Called for each next string.
|
||||
void set(Pos pos_, Pos end_)
|
||||
|
@ -1,44 +1,160 @@
|
||||
['1','2','3']
|
||||
['1','2','3']
|
||||
['1','2','3']
|
||||
['1']
|
||||
['1','2']
|
||||
['1','2','3']
|
||||
['1','2','3']
|
||||
['one','two','three','']
|
||||
['one','two','three','']
|
||||
['one','two','three','']
|
||||
['one']
|
||||
['one','two']
|
||||
['one','two','three']
|
||||
['one','two','three','']
|
||||
['one','two','three','']
|
||||
['abca','abc']
|
||||
['abca','abc']
|
||||
['abca','abc']
|
||||
['abca']
|
||||
['abca','abc']
|
||||
['abca','abc']
|
||||
['abca','abc']
|
||||
['1','a','b']
|
||||
['1','a','b']
|
||||
['1','a','b']
|
||||
['1']
|
||||
['1','a']
|
||||
['1','a','b']
|
||||
['1','a','b']
|
||||
['1!','a,','b.']
|
||||
['1!','a,','b.']
|
||||
['1!','a,','b.']
|
||||
['1!']
|
||||
['1!','a,']
|
||||
['1!','a,','b.']
|
||||
['1!','a,','b.']
|
||||
['1','2 3','4,5','abcde']
|
||||
['1','2 3','4,5','abcde']
|
||||
['1','2 3','4,5','abcde']
|
||||
['1']
|
||||
['1','2 3']
|
||||
['1','2 3','4,5']
|
||||
['1','2 3','4,5','abcde']
|
||||
['1','2 3','4,5','abcde']
|
||||
-- negative tests
|
||||
-- splitByChar
|
||||
-- (default)
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
['a']
|
||||
['a','']
|
||||
['a','','b']
|
||||
['a','','b','c']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
-- (include remainder)
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
['a==b=c=d']
|
||||
['a','=b=c=d']
|
||||
['a','','b=c=d']
|
||||
['a','','b','c=d']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
-- splitByString
|
||||
-- (default)
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a']
|
||||
['a','=']
|
||||
['a','=','=']
|
||||
['a','=','=','b']
|
||||
['a','=','=','b','=']
|
||||
['a','=','=','b','=','c']
|
||||
['a','=','=','b','=','c','=']
|
||||
['a','=','=','b','=','c','=']
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
['a']
|
||||
['a','']
|
||||
['a','','b']
|
||||
['a','','b','c']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
-- (include remainder)
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a==b=c=d']
|
||||
['a','==b=c=d']
|
||||
['a','=','=b=c=d']
|
||||
['a','=','=','b=c=d']
|
||||
['a','=','=','b','=c=d']
|
||||
['a','=','=','b','=','c=d']
|
||||
['a','=','=','b','=','c','=d']
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a','=','=','b','=','c','=','d']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
['a==b=c=d']
|
||||
['a','=b=c=d']
|
||||
['a','','b=c=d']
|
||||
['a','','b','c=d']
|
||||
['a','','b','c','d']
|
||||
['a','','b','c','d']
|
||||
-- splitByRegexp
|
||||
-- (default)
|
||||
['a','bc','de','f']
|
||||
['a','bc','de','f']
|
||||
['a','bc','de','f']
|
||||
['a']
|
||||
['a','bc']
|
||||
['a','bc','de']
|
||||
['a','bc','de','f']
|
||||
['a','bc','de','f']
|
||||
['a','1','2','b','c','2','3','d','e','3','4','5','f']
|
||||
['a','1','2','b','c','2','3','d','e','3','4','5','f']
|
||||
['a','1','2','b','c','2','3','d','e','3','4','5','f']
|
||||
['a']
|
||||
['a','1']
|
||||
['a','1','2']
|
||||
['a','1','2','b']
|
||||
['a','1','2','b','c']
|
||||
-- (include remainder)
|
||||
['a','1','2','b','c','2','3','d','e','3','4','5','f']
|
||||
['a','1','2','b','c','2','3','d','e','3','4','5','f']
|
||||
['a','1','2','b','c','2','3','d','e','3','4','5','f']
|
||||
['a12bc23de345f']
|
||||
['a','12bc23de345f']
|
||||
['a','1','2bc23de345f']
|
||||
['a','1','2','bc23de345f']
|
||||
['a','1','2','b','c23de345f']
|
||||
['a','bc','de','f']
|
||||
['a','bc','de','f']
|
||||
['a','bc','de','f']
|
||||
['a12bc23de345f']
|
||||
['a','bc23de345f']
|
||||
['a','bc','de345f']
|
||||
['a','bc','de','f']
|
||||
['a','bc','de','f']
|
||||
-- splitByAlpha
|
||||
-- (default)
|
||||
['ab','cd','ef','gh']
|
||||
['ab','cd','ef','gh']
|
||||
['ab','cd','ef','gh']
|
||||
['ab']
|
||||
['ab','cd']
|
||||
['ab','cd','ef']
|
||||
['ab','cd','ef','gh']
|
||||
['ab','cd','ef','gh']
|
||||
-- (include remainder)
|
||||
['ab','cd','ef','gh']
|
||||
['ab','cd','ef','gh']
|
||||
['ab','cd','ef','gh']
|
||||
['ab.cd.ef.gh']
|
||||
['ab','cd.ef.gh']
|
||||
['ab','cd','ef.gh']
|
||||
['ab','cd','ef','gh']
|
||||
['ab','cd','ef','gh']
|
||||
-- splitByNonAlpha
|
||||
-- (default)
|
||||
['128','0','0','1']
|
||||
['128','0','0','1']
|
||||
['128','0','0','1']
|
||||
['128']
|
||||
['128','0']
|
||||
['128','0','0']
|
||||
['128','0','0','1']
|
||||
['128','0','0','1']
|
||||
-- (include remainder)
|
||||
['128','0','0','1']
|
||||
['128','0','0','1']
|
||||
['128','0','0','1']
|
||||
['128.0.0.1']
|
||||
['128','0.0.1']
|
||||
['128','0','0.1']
|
||||
['128','0','0','1']
|
||||
['128','0','0','1']
|
||||
-- splitByWhitespace
|
||||
-- (default)
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
['Nein,']
|
||||
['Nein,','nein,']
|
||||
['Nein,','nein,','nein!']
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
-- (include remainder)
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
['Nein, nein, nein! Doch!']
|
||||
['Nein,','nein, nein! Doch!']
|
||||
['Nein,','nein,','nein! Doch!']
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
['Nein,','nein,','nein!','Doch!']
|
||||
|
@ -1,59 +1,175 @@
|
||||
select splitByChar(',', '1,2,3');
|
||||
select splitByChar(',', '1,2,3', -1);
|
||||
select splitByChar(',', '1,2,3', 0);
|
||||
select splitByChar(',', '1,2,3', 1);
|
||||
select splitByChar(',', '1,2,3', 2);
|
||||
select splitByChar(',', '1,2,3', 3);
|
||||
select splitByChar(',', '1,2,3', 4);
|
||||
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC');
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC', -1);
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 0);
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 1);
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 2);
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 3);
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 4);
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC', 5);
|
||||
|
||||
SELECT alphaTokens('abca1abc');
|
||||
SELECT alphaTokens('abca1abc', -1);
|
||||
SELECT alphaTokens('abca1abc', 0);
|
||||
SELECT alphaTokens('abca1abc', 1);
|
||||
SELECT alphaTokens('abca1abc', 2);
|
||||
SELECT alphaTokens('abca1abc', 3);
|
||||
|
||||
SELECT splitByAlpha('abca1abc');
|
||||
|
||||
SELECT splitByNonAlpha(' 1! a, b. ');
|
||||
SELECT splitByNonAlpha(' 1! a, b. ', -1);
|
||||
SELECT splitByNonAlpha(' 1! a, b. ', 0);
|
||||
SELECT splitByNonAlpha(' 1! a, b. ', 1);
|
||||
SELECT splitByNonAlpha(' 1! a, b. ', 2);
|
||||
SELECT splitByNonAlpha(' 1! a, b. ', 3);
|
||||
SELECT splitByNonAlpha(' 1! a, b. ', 4);
|
||||
|
||||
SELECT splitByWhitespace(' 1! a, b. ');
|
||||
SELECT splitByWhitespace(' 1! a, b. ', -1);
|
||||
SELECT splitByWhitespace(' 1! a, b. ', 0);
|
||||
SELECT splitByWhitespace(' 1! a, b. ', 1);
|
||||
SELECT splitByWhitespace(' 1! a, b. ', 2);
|
||||
SELECT splitByWhitespace(' 1! a, b. ', 3);
|
||||
SELECT splitByWhitespace(' 1! a, b. ', 4);
|
||||
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde');
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', -1);
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 0);
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 1);
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 2);
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 3);
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 4);
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', 5);
|
||||
|
||||
|
||||
select splitByChar(',', '1,2,3', ''); -- { serverError 43 }
|
||||
select splitByRegexp('[ABC]', 'oneAtwoBthreeC', ''); -- { serverError 43 }
|
||||
SELECT '-- negative tests';
|
||||
SELECT splitByChar(',', '1,2,3', ''); -- { serverError 43 }
|
||||
SELECT splitByRegexp('[ABC]', 'oneAtwoBthreeC', ''); -- { serverError 43 }
|
||||
SELECT alphaTokens('abca1abc', ''); -- { serverError 43 }
|
||||
SELECT splitByAlpha('abca1abc', ''); -- { serverError 43 }
|
||||
SELECT splitByNonAlpha(' 1! a, b. ', ''); -- { serverError 43 }
|
||||
SELECT splitByWhitespace(' 1! a, b. ', ''); -- { serverError 43 }
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', ''); -- { serverError 43 }
|
||||
SELECT splitByString(', ', '1, 2 3, 4,5, abcde', ''); -- { serverError 43 }
|
||||
|
||||
SELECT '-- splitByChar';
|
||||
SELECT '-- (default)';
|
||||
SELECT splitByChar('=', 'a==b=c=d');
|
||||
SELECT splitByChar('=', 'a==b=c=d', -1);
|
||||
SELECT splitByChar('=', 'a==b=c=d', 0);
|
||||
SELECT splitByChar('=', 'a==b=c=d', 1);
|
||||
SELECT splitByChar('=', 'a==b=c=d', 2);
|
||||
SELECT splitByChar('=', 'a==b=c=d', 3);
|
||||
SELECT splitByChar('=', 'a==b=c=d', 4);
|
||||
SELECT splitByChar('=', 'a==b=c=d', 5);
|
||||
SELECT splitByChar('=', 'a==b=c=d', 6);
|
||||
SELECT '-- (include remainder)';
|
||||
SELECT splitByChar('=', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByChar('=', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByChar('=', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByChar('=', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByChar('=', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByChar('=', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByChar('=', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByChar('=', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByChar('=', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
|
||||
SELECT '-- splitByString';
|
||||
SELECT '-- (default)';
|
||||
SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 7) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 7) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 8) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('', 'a==b=c=d', 9) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT splitByString('=', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 0;
|
||||
SELECT '-- (include remainder)';
|
||||
SELECT splitByString('', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 7) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 8) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('', 'a==b=c=d', 9) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByString('=', 'a==b=c=d', 6) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
|
||||
|
||||
SELECT '-- splitByRegexp';
|
||||
SELECT '-- (default)';
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f');
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1);
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0);
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1);
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2);
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 3);
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 4);
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 5);
|
||||
SELECT splitByRegexp('', 'a12bc23de345f');
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', -1);
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 0);
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 1);
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 2);
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 3);
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 4);
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 5);
|
||||
SELECT '-- (include remainder)';
|
||||
SELECT splitByRegexp('', 'a12bc23de345f') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('', 'a12bc23de345f', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByRegexp('\\d+', 'a12bc23de345f', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
|
||||
SELECT '-- splitByAlpha';
|
||||
SELECT '-- (default)';
|
||||
SELECT splitByAlpha('ab.cd.ef.gh');
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', -1);
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 0);
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 1);
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 2);
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 3);
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 4);
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 5);
|
||||
SELECT '-- (include remainder)';
|
||||
SELECT splitByAlpha('ab.cd.ef.gh') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByAlpha('ab.cd.ef.gh', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
|
||||
SELECT '-- splitByNonAlpha';
|
||||
SELECT '-- (default)';
|
||||
SELECT splitByNonAlpha('128.0.0.1');
|
||||
SELECT splitByNonAlpha('128.0.0.1', -1);
|
||||
SELECT splitByNonAlpha('128.0.0.1', 0);
|
||||
SELECT splitByNonAlpha('128.0.0.1', 1);
|
||||
SELECT splitByNonAlpha('128.0.0.1', 2);
|
||||
SELECT splitByNonAlpha('128.0.0.1', 3);
|
||||
SELECT splitByNonAlpha('128.0.0.1', 4);
|
||||
SELECT splitByNonAlpha('128.0.0.1', 5);
|
||||
SELECT '-- (include remainder)';
|
||||
SELECT splitByNonAlpha('128.0.0.1') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByNonAlpha('128.0.0.1', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByNonAlpha('128.0.0.1', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByNonAlpha('128.0.0.1', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByNonAlpha('128.0.0.1', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByNonAlpha('128.0.0.1', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByNonAlpha('128.0.0.1', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByNonAlpha('128.0.0.1', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
--
|
||||
--
|
||||
SELECT '-- splitByWhitespace';
|
||||
SELECT '-- (default)';
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!');
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1);
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0);
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1);
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2);
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 3);
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 4);
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 5);
|
||||
SELECT '-- (include remainder)';
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!') SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', -1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 0) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 1) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 3) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 4) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
SELECT splitByWhitespace('Nein, nein, nein! Doch!', 5) SETTINGS splitby_max_substrings_includes_remaining_string = 1;
|
||||
|
@ -2215,6 +2215,8 @@ sparkBar
|
||||
sparkbar
|
||||
sparsehash
|
||||
speedscope
|
||||
splitBy
|
||||
splitby
|
||||
splitByChar
|
||||
splitByNonAlpha
|
||||
splitByRegexp
|
||||
|
Loading…
Reference in New Issue
Block a user