ClickHouse/src/Functions/FunctionsStringArray.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

991 lines
31 KiB
C++
Raw Normal View History

2012-09-17 04:34:19 +00:00
#pragma once
2021-10-12 21:23:40 +00:00
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
2021-10-19 17:54:27 +00:00
#include <Columns/ColumnNullable.h>
2021-10-12 21:23:40 +00:00
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeArray.h>
2021-10-12 21:23:40 +00:00
#include <DataTypes/DataTypeNullable.h>
2019-02-10 17:40:52 +00:00
#include <DataTypes/DataTypeString.h>
2021-10-12 21:23:40 +00:00
#include <Functions/FunctionHelpers.h>
2021-10-22 20:19:32 +00:00
#include <Functions/FunctionsConversion.h>
2021-05-17 07:30:42 +00:00
#include <Functions/IFunction.h>
#include <Functions/Regexps.h>
#include <IO/WriteHelpers.h>
2021-10-12 21:23:40 +00:00
#include <Interpreters/Context_fwd.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
2012-09-17 04:34:19 +00:00
namespace DB
{
2017-06-13 02:06:53 +00:00
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
2017-06-13 02:06:53 +00:00
}
2017-05-27 15:45:25 +00:00
/** Functions that split strings into an array of strings or vice versa.
2012-09-17 04:34:19 +00:00
*
2022-11-03 08:12:19 +00:00
* splitByChar(sep, s[, max_substrings])
* splitByString(sep, s[, max_substrings])
* splitByRegexp(regexp, s[, max_substrings])
*
2022-11-03 08:12:19 +00:00
* splitByWhitespace(s[, max_substrings]) - split the string by whitespace characters
* splitByNonAlpha(s[, max_substrings]) - split the string by whitespace and punctuation characters
*
2017-05-27 15:45:25 +00:00
* extractAll(s, regexp) - select from the string the subsequences corresponding to the regexp.
* - first subpattern, if regexp has subpattern;
* - zero subpattern (the match part, otherwise);
* - otherwise, an empty array
*
* arrayStringConcat(arr)
* arrayStringConcat(arr, delimiter)
2017-05-27 15:45:25 +00:00
* - join an array of strings into one string via a separator.
*
2022-11-03 08:12:19 +00:00
* alphaTokens(s[, max_substrings]) - select from the string subsequence `[a-zA-Z]+`.
*
2017-05-27 15:45:25 +00:00
* URL functions are located separately.
2012-09-17 04:34:19 +00:00
*/
using Pos = const char *;
2012-09-17 04:34:19 +00:00
2023-09-18 20:08:37 +00:00
std::optional<size_t> extractMaxSplits(const ColumnsWithTypeAndName & arguments, size_t max_substrings_argument_position);
2017-05-27 15:45:25 +00:00
/// Substring generators. All of them have a common interface.
2022-11-03 07:06:58 +00:00
class SplitByAlphaImpl
2012-09-17 04:34:19 +00:00
{
private:
Pos pos;
Pos end;
2023-09-11 18:48:40 +00:00
std::optional<size_t> max_splits;
size_t splits;
2023-09-18 20:08:37 +00:00
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "alphaTokens";
static String getName() { return name; }
2022-11-03 07:05:23 +00:00
static bool isVariadic() { return true; }
2022-01-30 01:10:27 +00:00
2022-11-03 07:05:23 +00:00
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"s", &isString<IDataType>, nullptr, "String"},
};
FunctionArgumentDescriptors optional_args{
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
};
2022-11-10 02:46:51 +00:00
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
}
static constexpr auto strings_argument_position = 0uz;
2023-09-18 20:08:37 +00:00
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
2023-09-11 18:48:40 +00:00
}
2017-05-27 15:45:25 +00:00
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
2023-09-11 18:48:40 +00:00
splits = 0;
}
2017-05-27 15:45:25 +00:00
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && !isAlphaASCII(*pos))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (max_substrings_includes_remaining_string)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (splits == *max_splits - 1)
{
2023-09-18 20:08:37 +00:00
token_end = end;
pos = end;
return true;
}
2023-09-11 18:48:40 +00:00
}
2023-09-18 20:08:37 +00:00
else
if (splits == *max_splits)
return false;
2023-09-11 18:48:40 +00:00
}
while (pos < end && isAlphaASCII(*pos))
++pos;
token_end = pos;
2023-09-11 18:48:40 +00:00
++splits;
return true;
}
};
2012-09-17 04:34:19 +00:00
class SplitByNonAlphaImpl
{
private:
Pos pos;
Pos end;
2023-09-11 18:48:40 +00:00
std::optional<size_t> max_splits;
size_t splits;
2023-09-18 20:08:37 +00:00
bool max_substrings_includes_remaining_string;
public:
/// Get the name of the function.
static constexpr auto name = "splitByNonAlpha";
static String getName() { return name; }
2022-11-03 07:05:23 +00:00
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
SplitByAlphaImpl::checkArguments(func, arguments);
}
static constexpr auto strings_argument_position = 0uz;
2023-09-18 20:08:37 +00:00
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
2023-09-11 18:48:40 +00:00
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
2023-09-11 18:48:40 +00:00
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && (isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (max_substrings_includes_remaining_string)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (splits == *max_splits - 1)
{
2023-09-18 20:08:37 +00:00
token_end = end;
pos = end;
return true;
}
2023-09-11 18:48:40 +00:00
}
2023-09-18 20:08:37 +00:00
else
if (splits == *max_splits)
return false;
2023-09-11 18:48:40 +00:00
}
while (pos < end && !(isWhitespaceASCII(*pos) || isPunctuationASCII(*pos)))
++pos;
token_end = pos;
2023-09-11 18:48:40 +00:00
splits++;
return true;
}
};
class SplitByWhitespaceImpl
{
private:
Pos pos;
Pos end;
2023-09-11 18:48:40 +00:00
std::optional<size_t> max_splits;
size_t splits;
2023-09-18 20:08:37 +00:00
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByWhitespace";
static String getName() { return name; }
2022-11-03 07:05:23 +00:00
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
return SplitByNonAlphaImpl::checkArguments(func, arguments);
}
static constexpr auto strings_argument_position = 0uz;
2023-09-18 20:08:37 +00:00
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 1);
2023-09-11 18:48:40 +00:00
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
2023-09-11 18:48:40 +00:00
splits = 0;
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
/// Skip garbage
while (pos < end && isWhitespaceASCII(*pos))
++pos;
if (pos == end)
return false;
token_begin = pos;
if (max_splits)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (max_substrings_includes_remaining_string)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (splits == *max_splits - 1)
{
2023-09-18 20:08:37 +00:00
token_end = end;
pos = end;
return true;
}
2023-09-11 18:48:40 +00:00
}
2023-09-18 20:08:37 +00:00
else
if (splits == *max_splits)
return false;
2023-09-11 18:48:40 +00:00
}
while (pos < end && !isWhitespaceASCII(*pos))
++pos;
token_end = pos;
2023-09-11 18:48:40 +00:00
splits++;
return true;
}
};
2012-09-17 04:34:19 +00:00
class SplitByCharImpl
2012-09-17 04:34:19 +00:00
{
private:
Pos pos;
Pos end;
char separator;
2023-09-11 18:48:40 +00:00
std::optional<size_t> max_splits;
size_t splits;
2023-09-18 20:08:37 +00:00
bool max_substrings_includes_remaining_string;
2012-09-17 04:34:19 +00:00
public:
static constexpr auto name = "splitByChar";
static String getName() { return name; }
2022-01-30 01:10:27 +00:00
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
2012-09-17 04:34:19 +00:00
{
FunctionArgumentDescriptors mandatory_args{
{"separator", &isString<IDataType>, isColumnConst, "const String"},
{"s", &isString<IDataType>, nullptr, "String"}
};
2022-01-30 01:10:27 +00:00
FunctionArgumentDescriptors optional_args{
{"max_substrings", &isNativeInteger<IDataType>, isColumnConst, "const Number"},
};
validateFunctionArgumentTypes(func, arguments, mandatory_args, optional_args);
}
static constexpr auto strings_argument_position = 1uz;
2023-09-18 20:08:37 +00:00
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
2020-10-18 19:00:13 +00:00
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), getName());
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
String sep_str = col->getValue<String>();
if (sep_str.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal separator for function {}. Must be exactly one byte.", getName());
separator = sep_str[0];
2023-09-11 18:48:40 +00:00
2023-09-18 20:08:37 +00:00
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
}
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
2023-09-11 18:48:40 +00:00
splits = 0;
}
bool get(Pos & token_begin, Pos & token_end)
{
if (!pos)
return false;
token_begin = pos;
if (max_splits)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (max_substrings_includes_remaining_string)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (splits == *max_splits - 1)
{
2023-09-18 20:08:37 +00:00
token_end = end;
pos = nullptr;
return true;
}
2023-09-11 18:48:40 +00:00
}
2023-09-18 20:08:37 +00:00
else
if (splits == *max_splits)
return false;
2023-09-11 18:48:40 +00:00
}
pos = reinterpret_cast<Pos>(memchr(pos, separator, end - pos));
if (pos)
{
token_end = pos;
++pos;
2023-09-11 18:48:40 +00:00
++splits;
}
else
token_end = end;
return true;
}
};
class SplitByStringImpl
{
private:
Pos pos;
Pos end;
String separator;
2023-09-11 18:48:40 +00:00
std::optional<size_t> max_splits;
size_t splits;
2023-09-18 20:08:37 +00:00
bool max_substrings_includes_remaining_string;
public:
static constexpr auto name = "splitByString";
static String getName() { return name; }
2022-11-03 07:05:23 +00:00
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
SplitByCharImpl::checkArguments(func, arguments);
}
static constexpr auto strings_argument_position = 1uz;
2023-09-18 20:08:37 +00:00
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
{
2020-10-18 19:00:13 +00:00
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), getName());
separator = col->getValue<String>();
2023-09-11 18:48:40 +00:00
2023-09-18 20:08:37 +00:00
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
}
2017-05-27 15:45:25 +00:00
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
2023-09-11 18:48:40 +00:00
splits = 0;
}
2017-05-27 15:45:25 +00:00
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (separator.empty())
{
if (pos == end)
return false;
token_begin = pos;
2023-09-11 18:48:40 +00:00
if (max_splits)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (max_substrings_includes_remaining_string)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (splits == *max_splits - 1)
{
2023-09-18 20:08:37 +00:00
token_end = end;
pos = end;
return true;
}
2023-09-11 18:48:40 +00:00
}
2023-09-18 20:08:37 +00:00
else
if (splits == *max_splits)
return false;
2023-09-11 18:48:40 +00:00
}
pos += 1;
token_end = pos;
2023-09-11 18:48:40 +00:00
++splits;
}
else
{
if (!pos)
return false;
token_begin = pos;
if (max_splits)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (max_substrings_includes_remaining_string)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (splits == *max_splits - 1)
{
2023-09-18 20:08:37 +00:00
token_end = end;
pos = nullptr;
return true;
}
2023-09-11 18:48:40 +00:00
}
2023-09-18 20:08:37 +00:00
else
if (splits == *max_splits)
return false;
2023-09-11 18:48:40 +00:00
}
2023-09-11 18:48:40 +00:00
pos = reinterpret_cast<Pos>(memmem(pos, end - pos, separator.data(), separator.size()));
if (pos)
{
token_end = pos;
pos += separator.size();
++splits;
}
else
token_end = end;
}
return true;
}
};
2021-05-13 02:37:09 +00:00
class SplitByRegexpImpl
{
private:
Cache compiled regexps when evaluating non-const needles Needles in a (non-const) needle column may repeat and this commit allows to skip compilation for known needles. Out of the different design alternatives (see below, if someone is interested), we now maintain - one global pattern cache, - with a fixed size of 42k elements currently, - and use LRU as eviction strategy. ------------------------------------------------------------------------ (sorry for the wall of text, dumping it here not for reading but just for reference) Write-up about considered design alternatives: 1. Keep the current global cache of const needles. For non-const needles, probe the cache but don't store values in it. Pros: need to maintain just a single cache, no problem with cache pollution assuming there are few distinct constant needles Cons: only useful if a non-const needle occurred as already as a const needle --> overall too simplistic 2. Keep the current global cache for const needles. For non-const needles, create a local (e.g. per-query) cache Pros: unlike (1.), non-const needles can be skipped even if they did not occur yet, no pollution of the const pattern cache when there are very many non-const needles (e.g. large / highly distinct needle columns). Cons: caches may explode "horizontally", i.e. we'll end up with the const cache + caches for Q1, Q2, ... QN, this makes it harder to control the overall space consumption, also patterns residing in different caches cannot be reused between queries, another difficulty is that the concept of "query" does not really exist at matching level - there are only column chunks and we'd potentially end up with 1 cache / chunk 3. Queries with const and non-const needles insert into the same global cache. Pros: the advantages of (2.) + allows to reuse compiled patterns accross parallel queries Cons: needs an eviction strategy to control cache size and pollution (and btw. (2.) also needs eviction strategies for the individual caches) 4. Queries with const needle use global cache, queries with non-const needle use a different global cache --> Overall similar to (3) but ignores the (likely) edge case that const and non-const needles overlap. In sum, (3.) seems the simplest and most beneficial approach. Eviction strategies: 0. Don't ever evict --> cache may grow infinitely and eventually make the system unusable (may even pose a DoS risk) 1. Flush the cache after a certain threshold is exceeded --> very simple but may lead to peridic performance drops 2. Use LRU --> more graceful performance degradation at threshold but comes with a (constant) performance overhead to maintain the LRU queue In sum, given that the pattern compilation in RE2 should be quite costly (pattern-to-DFA/NFA), LRU may be acceptable.
2022-05-25 19:22:45 +00:00
Regexps::RegexpPtr re;
2021-05-13 02:37:09 +00:00
OptimizedRegularExpression::MatchVec matches;
Pos pos;
Pos end;
2023-09-11 18:48:40 +00:00
std::optional<size_t> max_splits;
size_t splits;
2023-09-18 20:08:37 +00:00
bool max_substrings_includes_remaining_string;
2023-09-11 18:48:40 +00:00
2021-05-13 02:37:09 +00:00
public:
static constexpr auto name = "splitByRegexp";
static String getName() { return name; }
2022-01-30 01:10:27 +00:00
static bool isVariadic() { return true; }
static size_t getNumberOfArguments() { return 0; }
2021-05-13 02:37:09 +00:00
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
2021-05-13 02:37:09 +00:00
{
SplitByStringImpl::checkArguments(func, arguments);
2021-05-13 02:37:09 +00:00
}
static constexpr auto strings_argument_position = 1uz;
2023-09-18 20:08:37 +00:00
void init(const ColumnsWithTypeAndName & arguments, bool max_substrings_includes_remaining_string_)
2021-05-13 02:37:09 +00:00
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[0].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[0].column->getName(), getName());
2021-05-13 02:37:09 +00:00
if (!col->getValue<String>().empty())
2023-09-14 16:12:29 +00:00
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
2023-09-11 18:48:40 +00:00
2023-09-18 20:08:37 +00:00
max_substrings_includes_remaining_string = max_substrings_includes_remaining_string_;
max_splits = extractMaxSplits(arguments, 2);
2021-05-13 02:37:09 +00:00
}
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
2023-09-11 18:48:40 +00:00
splits = 0;
2021-05-13 02:37:09 +00:00
}
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (!re)
2021-05-13 02:37:09 +00:00
{
if (pos == end)
return false;
token_begin = pos;
2023-09-11 18:48:40 +00:00
if (max_splits)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (max_substrings_includes_remaining_string)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (splits == *max_splits - 1)
{
2023-09-18 20:08:37 +00:00
token_end = end;
pos = end;
return true;
}
2023-09-11 18:48:40 +00:00
}
2023-09-18 20:08:37 +00:00
else
if (splits == *max_splits)
return false;
2023-09-11 18:48:40 +00:00
}
pos += 1;
token_end = pos;
2023-09-11 18:48:40 +00:00
++splits;
2021-05-13 02:37:09 +00:00
}
else
{
if (!pos || pos > end)
return false;
token_begin = pos;
if (max_splits)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (max_substrings_includes_remaining_string)
2023-09-11 18:48:40 +00:00
{
2023-09-18 20:08:37 +00:00
if (splits == *max_splits - 1)
{
2023-09-18 20:08:37 +00:00
token_end = end;
pos = nullptr;
return true;
}
2023-09-11 18:48:40 +00:00
}
2023-09-18 20:08:37 +00:00
else
if (splits == *max_splits)
return false;
2023-09-11 18:48:40 +00:00
}
if (!re->match(pos, end - pos, matches) || !matches[0].length)
{
token_end = end;
pos = end + 1;
}
else
{
token_end = pos + matches[0].offset;
pos = token_end + matches[0].length;
++splits;
}
2021-05-13 02:37:09 +00:00
}
return true;
}
};
class ExtractAllImpl
{
private:
Cache compiled regexps when evaluating non-const needles Needles in a (non-const) needle column may repeat and this commit allows to skip compilation for known needles. Out of the different design alternatives (see below, if someone is interested), we now maintain - one global pattern cache, - with a fixed size of 42k elements currently, - and use LRU as eviction strategy. ------------------------------------------------------------------------ (sorry for the wall of text, dumping it here not for reading but just for reference) Write-up about considered design alternatives: 1. Keep the current global cache of const needles. For non-const needles, probe the cache but don't store values in it. Pros: need to maintain just a single cache, no problem with cache pollution assuming there are few distinct constant needles Cons: only useful if a non-const needle occurred as already as a const needle --> overall too simplistic 2. Keep the current global cache for const needles. For non-const needles, create a local (e.g. per-query) cache Pros: unlike (1.), non-const needles can be skipped even if they did not occur yet, no pollution of the const pattern cache when there are very many non-const needles (e.g. large / highly distinct needle columns). Cons: caches may explode "horizontally", i.e. we'll end up with the const cache + caches for Q1, Q2, ... QN, this makes it harder to control the overall space consumption, also patterns residing in different caches cannot be reused between queries, another difficulty is that the concept of "query" does not really exist at matching level - there are only column chunks and we'd potentially end up with 1 cache / chunk 3. Queries with const and non-const needles insert into the same global cache. Pros: the advantages of (2.) + allows to reuse compiled patterns accross parallel queries Cons: needs an eviction strategy to control cache size and pollution (and btw. (2.) also needs eviction strategies for the individual caches) 4. Queries with const needle use global cache, queries with non-const needle use a different global cache --> Overall similar to (3) but ignores the (likely) edge case that const and non-const needles overlap. In sum, (3.) seems the simplest and most beneficial approach. Eviction strategies: 0. Don't ever evict --> cache may grow infinitely and eventually make the system unusable (may even pose a DoS risk) 1. Flush the cache after a certain threshold is exceeded --> very simple but may lead to peridic performance drops 2. Use LRU --> more graceful performance degradation at threshold but comes with a (constant) performance overhead to maintain the LRU queue In sum, given that the pattern compilation in RE2 should be quite costly (pattern-to-DFA/NFA), LRU may be acceptable.
2022-05-25 19:22:45 +00:00
Regexps::RegexpPtr re;
OptimizedRegularExpression::MatchVec matches;
size_t capture;
Pos pos;
Pos end;
public:
static constexpr auto name = "extractAll";
static String getName() { return name; }
2022-01-30 01:10:27 +00:00
static bool isVariadic() { return false; }
2016-12-29 19:38:10 +00:00
static size_t getNumberOfArguments() { return 2; }
static void checkArguments(const IFunction & func, const ColumnsWithTypeAndName & arguments)
{
FunctionArgumentDescriptors mandatory_args{
{"haystack", &isString<IDataType>, nullptr, "String"},
{"pattern", &isString<IDataType>, isColumnConst, "const String"}
};
validateFunctionArgumentTypes(func, arguments, mandatory_args);
}
static constexpr auto strings_argument_position = 0uz;
2023-09-18 20:08:37 +00:00
void init(const ColumnsWithTypeAndName & arguments, bool /*max_substrings_includes_remaining_string*/)
{
2020-10-18 19:00:13 +00:00
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
if (!col)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
"Must be constant string.", arguments[1].column->getName(), getName());
2023-09-14 16:12:29 +00:00
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
matches.resize(capture + 1);
}
2017-05-27 15:45:25 +00:00
/// Called for each next string.
void set(Pos pos_, Pos end_)
{
pos = pos_;
end = end_;
}
2017-05-27 15:45:25 +00:00
/// Get the next token, if any, or return false.
bool get(Pos & token_begin, Pos & token_end)
{
if (!pos || pos > end)
return false;
if (!re->match(pos, end - pos, matches) || !matches[0].length)
return false;
2018-12-27 01:51:22 +00:00
if (matches[capture].offset == std::string::npos)
{
/// Empty match.
token_begin = pos;
token_end = pos;
}
else
{
token_begin = pos + matches[capture].offset;
token_end = token_begin + matches[capture].length;
}
pos += matches[0].offset + matches[0].length;
return true;
}
};
2017-05-27 15:45:25 +00:00
/// A function that takes a string, and returns an array of substrings created by some generator.
template <typename Generator>
class FunctionTokens : public IFunction
{
2023-09-11 18:48:40 +00:00
private:
2023-09-18 20:08:37 +00:00
bool max_substrings_includes_remaining_string;
2023-09-11 18:48:40 +00:00
public:
static constexpr auto name = Generator::name;
2023-09-11 18:48:40 +00:00
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionTokens>(context); }
explicit FunctionTokens<Generator>(ContextPtr context)
{
const Settings & settings = context->getSettingsRef();
2023-09-18 20:08:37 +00:00
max_substrings_includes_remaining_string = settings.splitby_max_substrings_includes_remaining_string;
2023-09-11 18:48:40 +00:00
}
String getName() const override { return name; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
2022-01-30 01:10:27 +00:00
bool isVariadic() const override { return Generator::isVariadic(); }
2016-12-29 19:38:10 +00:00
size_t getNumberOfArguments() const override { return Generator::getNumberOfArguments(); }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
Generator::checkArguments(*this, arguments);
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
2012-09-17 04:34:19 +00:00
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
2012-09-17 04:34:19 +00:00
{
2013-03-26 19:30:23 +00:00
Generator generator;
2023-09-18 20:08:37 +00:00
generator.init(arguments, max_substrings_includes_remaining_string);
const auto & array_argument = arguments[generator.strings_argument_position];
2020-10-18 19:00:13 +00:00
const ColumnString * col_str = checkAndGetColumn<ColumnString>(array_argument.column.get());
const ColumnConst * col_str_const = checkAndGetColumnConstStringOrFixedString(array_argument.column.get());
auto col_res = ColumnArray::create(ColumnString::create());
ColumnString & res_strings = typeid_cast<ColumnString &>(col_res->getData());
ColumnString::Chars & res_strings_chars = res_strings.getChars();
ColumnString::Offsets & res_strings_offsets = res_strings.getOffsets();
ColumnArray::Offsets & res_offsets = col_res->getOffsets();
2012-09-17 04:34:19 +00:00
if (col_str)
{
const ColumnString::Chars & src_chars = col_str->getChars();
const ColumnString::Offsets & src_offsets = col_str->getOffsets();
2012-09-17 04:34:19 +00:00
res_offsets.reserve(src_offsets.size());
2017-05-27 15:45:25 +00:00
res_strings_offsets.reserve(src_offsets.size() * 5); /// Constant 5 - at random.
2012-09-17 04:34:19 +00:00
res_strings_chars.reserve(src_chars.size());
2014-04-08 07:31:51 +00:00
Pos token_begin = nullptr;
Pos token_end = nullptr;
2012-09-17 04:34:19 +00:00
size_t size = src_offsets.size();
ColumnString::Offset current_src_offset = 0;
ColumnArray::Offset current_dst_offset = 0;
ColumnString::Offset current_dst_strings_offset = 0;
2012-09-17 04:34:19 +00:00
for (size_t i = 0; i < size; ++i)
{
Pos pos = reinterpret_cast<Pos>(&src_chars[current_src_offset]);
current_src_offset = src_offsets[i];
Pos end = reinterpret_cast<Pos>(&src_chars[current_src_offset]) - 1;
generator.set(pos, end);
2012-09-17 04:34:19 +00:00
size_t j = 0;
2023-09-11 18:48:40 +00:00
while (generator.get(token_begin, token_end))
2012-09-17 04:34:19 +00:00
{
size_t token_size = token_end - token_begin;
2012-09-17 04:34:19 +00:00
res_strings_chars.resize(res_strings_chars.size() + token_size + 1);
memcpySmallAllowReadWriteOverflow15(&res_strings_chars[current_dst_strings_offset], token_begin, token_size);
2013-10-31 19:24:22 +00:00
res_strings_chars[current_dst_strings_offset + token_size] = 0;
2012-09-17 04:34:19 +00:00
current_dst_strings_offset += token_size + 1;
res_strings_offsets.push_back(current_dst_strings_offset);
++j;
}
2012-09-17 04:34:19 +00:00
current_dst_offset += j;
res_offsets.push_back(current_dst_offset);
}
2020-10-18 19:00:13 +00:00
return col_res;
2012-09-17 04:34:19 +00:00
}
else if (col_str_const)
2012-09-17 04:34:19 +00:00
{
String src = col_str_const->getValue<String>();
2012-09-17 04:34:19 +00:00
Array dst;
generator.set(src.data(), src.data() + src.size());
2014-04-08 07:31:51 +00:00
Pos token_begin = nullptr;
Pos token_end = nullptr;
2023-09-11 18:48:40 +00:00
while (generator.get(token_begin, token_end))
2012-09-17 04:34:19 +00:00
dst.push_back(String(token_begin, token_end - token_begin));
return result_type->createColumnConst(col_str_const->size(), dst);
2012-09-17 04:34:19 +00:00
}
else
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}",
array_argument.column->getName(), array_argument.column->getName(), getName());
2012-09-17 04:34:19 +00:00
}
};
2021-10-22 20:19:32 +00:00
/// Joins an array of type serializable to string into one string via a separator.
class FunctionArrayStringConcat : public IFunction
{
private:
2021-10-19 17:54:27 +00:00
static void executeInternal(
const ColumnString::Chars & src_chars,
const ColumnString::Offsets & src_string_offsets,
const ColumnArray::Offsets & src_array_offsets,
2021-10-19 17:54:27 +00:00
const char * delimiter,
const size_t delimiter_size,
ColumnString::Chars & dst_chars,
2021-10-19 17:54:27 +00:00
ColumnString::Offsets & dst_string_offsets,
const char8_t * null_map)
{
size_t size = src_array_offsets.size();
if (!size)
return;
2017-05-27 15:45:25 +00:00
/// With a small margin - as if the separator goes after the last string of the array.
dst_chars.resize(
src_chars.size()
2017-05-27 15:45:25 +00:00
+ delimiter_size * src_string_offsets.size() /// Separators after each string...
+ src_array_offsets.size() /// Zero byte after each joined string
- src_string_offsets.size()); /// The former zero byte after each string of the array
2017-05-27 15:45:25 +00:00
/// There will be as many strings as there were arrays.
dst_string_offsets.resize(src_array_offsets.size());
ColumnArray::Offset current_src_array_offset = 0;
ColumnString::Offset current_dst_string_offset = 0;
2017-05-27 15:45:25 +00:00
/// Loop through the array of strings.
for (size_t i = 0; i < size; ++i)
{
2021-10-19 17:54:27 +00:00
bool first_non_null = true;
2017-05-27 15:45:25 +00:00
/// Loop through the rows within the array. /// NOTE You can do everything in one copy, if the separator has a size of 1.
for (auto next_src_array_offset = src_array_offsets[i]; current_src_array_offset < next_src_array_offset; ++current_src_array_offset)
{
if (null_map && null_map[current_src_array_offset]) [[unlikely]]
2021-10-19 17:54:27 +00:00
continue;
if (!first_non_null)
{
memcpy(&dst_chars[current_dst_string_offset], delimiter, delimiter_size);
current_dst_string_offset += delimiter_size;
}
first_non_null = false;
const auto current_src_string_offset = current_src_array_offset ? src_string_offsets[current_src_array_offset - 1] : 0;
size_t bytes_to_copy = src_string_offsets[current_src_array_offset] - current_src_string_offset - 1;
memcpySmallAllowReadWriteOverflow15(
&dst_chars[current_dst_string_offset], &src_chars[current_src_string_offset], bytes_to_copy);
current_dst_string_offset += bytes_to_copy;
}
dst_chars[current_dst_string_offset] = 0;
++current_dst_string_offset;
dst_string_offsets[i] = current_dst_string_offset;
}
dst_chars.resize(dst_string_offsets.back());
}
2021-10-19 17:54:27 +00:00
static void executeInternal(
const ColumnString & col_string,
const ColumnArray & col_arr,
const String & delimiter,
ColumnString & col_res,
const char8_t * null_map = nullptr)
{
executeInternal(
col_string.getChars(),
col_string.getOffsets(),
col_arr.getOffsets(),
delimiter.data(),
delimiter.size(),
col_res.getChars(),
col_res.getOffsets(),
null_map);
}
2021-10-22 20:19:32 +00:00
static ColumnPtr serializeNestedColumn(const ColumnArray & col_arr, const DataTypePtr & nested_type)
{
if (isString(nested_type))
{
return col_arr.getDataPtr();
}
else if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData());
col_nullable && isString(col_nullable->getNestedColumn().getDataType()))
{
return col_nullable->getNestedColumnPtr();
}
else
{
ColumnsWithTypeAndName cols;
cols.emplace_back(col_arr.getDataPtr(), nested_type, "tmp");
2021-12-15 17:40:36 +00:00
return ConvertImplGenericToString<ColumnString>::execute(cols, std::make_shared<DataTypeString>(), col_arr.size());
2021-10-22 20:19:32 +00:00
}
}
public:
static constexpr auto name = "arrayStringConcat";
2021-06-01 12:20:52 +00:00
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayStringConcat>(); }
String getName() const override
{
return name;
}
2016-12-29 19:38:10 +00:00
bool isVariadic() const override { return true; }
2021-06-22 16:21:23 +00:00
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
2016-12-29 19:38:10 +00:00
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override;
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /*input_rows_count*/) const override
{
String delimiter;
if (arguments.size() == 2)
{
2020-10-18 19:00:13 +00:00
const ColumnConst * col_delim = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
if (!col_delim)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant string.", getName());
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
delimiter = col_delim->getValue<String>();
}
2021-10-22 20:19:32 +00:00
const auto & nested_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
if (const ColumnConst * col_const_arr = checkAndGetColumnConst<ColumnArray>(arguments[0].column.get());
col_const_arr && isString(nested_type))
{
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
Array src_arr = col_const_arr->getValue<Array>();
String dst_str;
2021-10-12 21:23:40 +00:00
bool first_non_null = true;
for (size_t i = 0, size = src_arr.size(); i < size; ++i)
{
2021-10-12 21:23:40 +00:00
if (src_arr[i].isNull())
continue;
if (!first_non_null)
dst_str += delimiter;
2021-10-12 21:23:40 +00:00
first_non_null = false;
dst_str += src_arr[i].get<const String &>();
}
ColumnConst unification (#1011) * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * Fixed error in ColumnArray::replicateGeneric [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150]. * ColumnConst: unification (incomplete) [#CLICKHOUSE-3150].
2017-07-21 06:35:58 +00:00
2020-10-18 19:00:13 +00:00
return result_type->createColumnConst(col_const_arr->size(), dst_str);
}
2021-10-22 20:19:32 +00:00
ColumnPtr src_column = arguments[0].column->convertToFullColumnIfConst();
const ColumnArray & col_arr = assert_cast<const ColumnArray &>(*src_column.get());
ColumnPtr str_subcolumn = serializeNestedColumn(col_arr, nested_type);
const ColumnString & col_string = assert_cast<const ColumnString &>(*str_subcolumn.get());
auto col_res = ColumnString::create();
if (const ColumnNullable * col_nullable = checkAndGetColumn<ColumnNullable>(col_arr.getData()))
executeInternal(col_string, col_arr, delimiter, *col_res, col_nullable->getNullMapData().data());
else
2021-10-22 20:19:32 +00:00
executeInternal(col_string, col_arr, delimiter, *col_res);
return col_res;
}
};
2022-11-03 07:06:58 +00:00
using FunctionSplitByAlpha = FunctionTokens<SplitByAlphaImpl>;
using FunctionSplitByNonAlpha = FunctionTokens<SplitByNonAlphaImpl>;
using FunctionSplitByWhitespace = FunctionTokens<SplitByWhitespaceImpl>;
2017-06-13 02:06:53 +00:00
using FunctionSplitByChar = FunctionTokens<SplitByCharImpl>;
using FunctionSplitByString = FunctionTokens<SplitByStringImpl>;
2021-05-13 02:37:09 +00:00
using FunctionSplitByRegexp = FunctionTokens<SplitByRegexpImpl>;
2017-06-13 02:06:53 +00:00
using FunctionExtractAll = FunctionTokens<ExtractAllImpl>;
2012-09-17 04:34:19 +00:00
}