This commit is contained in:
Yarik Briukhovetskyi 2024-11-27 12:33:01 +01:00 committed by GitHub
parent 4a3ceac1c5
commit c2f74fa4aa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 63 additions and 11 deletions

View File

@ -15,12 +15,18 @@ groupConcat[(delimiter [, limit])](expression);
**Arguments** **Arguments**
- `expression` — The expression or column name that outputs strings to be concatenated.. - `expression` — The expression or column name that outputs strings to be concatenated.
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified.
**Parameters**
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified. - `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified.
- `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional. - `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional.
:::note :::note
If delimiter is specified without limit, it must be the first parameter. If both delimiter and limit are specified, delimiter must precede limit. If delimiter is specified without limit, it must be the first parameter. If both delimiter and limit are specified, delimiter must precede limit.
Also, if different delimiters are specified as parameters and arguments, the delimiter from arguments will be used only.
::: :::
**Returned value** **Returned value**

View File

@ -31,16 +31,24 @@ namespace ErrorCodes
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS; extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
} }
namespace namespace
{ {
enum GroupConcatOverload
{
ONE_ARGUMENT,
TWO_ARGUMENTS
};
struct GroupConcatDataBase struct GroupConcatDataBase
{ {
UInt64 data_size = 0; UInt64 data_size = 0;
UInt64 allocated_size = 0; UInt64 allocated_size = 0;
char * data = nullptr; char * data = nullptr;
String data_delimiter;
void checkAndUpdateSize(UInt64 add, Arena * arena) void checkAndUpdateSize(UInt64 add, Arena * arena)
{ {
@ -117,16 +125,18 @@ class GroupConcatImpl final
UInt64 limit; UInt64 limit;
const String delimiter; const String delimiter;
const DataTypePtr type; const DataTypePtr type;
GroupConcatOverload overload = ONE_ARGUMENT;
public: public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_) GroupConcatImpl(const DataTypes & data_types, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>( : IAggregateFunctionDataHelper<GroupConcatData<has_limit>, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>()) {data_types}, parameters_, std::make_shared<DataTypeString>())
, limit(limit_) , limit(limit_)
, delimiter(delimiter_) , delimiter(delimiter_)
, type(data_type_) , type(data_types[0])
{ {
serialization = isFixedString(type) ? std::make_shared<DataTypeString>()->getDefaultSerialization() : this->argument_types[0]->getDefaultSerialization(); serialization = isFixedString(type) ? std::make_shared<DataTypeString>()->getDefaultSerialization() : this->argument_types[0]->getDefaultSerialization();
overload = data_types.size() > 1 ? TWO_ARGUMENTS : ONE_ARGUMENT;
} }
String getName() const override { return name; } String getName() const override { return name; }
@ -134,13 +144,20 @@ public:
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{ {
auto & cur_data = this->data(place); auto & cur_data = this->data(place);
if (cur_data.data_delimiter.empty())
{
cur_data.data_delimiter = delimiter;
if (overload == GroupConcatOverload::TWO_ARGUMENTS)
cur_data.data_delimiter = columns[1]->getDataAt(0).toString();
}
const String & delim = cur_data.data_delimiter;
if constexpr (has_limit) if constexpr (has_limit)
if (cur_data.num_rows >= limit) if (cur_data.num_rows >= limit)
return; return;
if (cur_data.data_size != 0) if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); cur_data.insertChar(delim.c_str(), delim.size(), arena);
if (isFixedString(type)) if (isFixedString(type))
{ {
@ -160,13 +177,15 @@ public:
if (rhs_data.data_size == 0) if (rhs_data.data_size == 0)
return; return;
const String & delim = cur_data.data_delimiter;
if constexpr (has_limit) if constexpr (has_limit)
{ {
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows); UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i) for (UInt64 i = 0; i < new_elems_count; ++i)
{ {
if (cur_data.data_size != 0) if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); cur_data.insertChar(delim.c_str(), delim.size(), arena);
cur_data.offsets.push_back(cur_data.data_size, arena); cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena); cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
@ -177,7 +196,7 @@ public:
else else
{ {
if (cur_data.data_size != 0) if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); cur_data.insertChar(delim.c_str(), delim.size(), arena);
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena); cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
} }
@ -238,9 +257,16 @@ public:
}; };
AggregateFunctionPtr createAggregateFunctionGroupConcat( AggregateFunctionPtr createAggregateFunctionGroupConcat(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *) const std::string & name,
const DataTypes & argument_types,
const Array & parameters,
const Settings * /* settings */
)
{ {
assertUnary(name, argument_types); // Ensure the number of arguments is between 1 and 2
if (argument_types.empty() || argument_types.size() > 2)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Incorrect number of arguments for function {}, expected 1 to 2, got {}", name, argument_types.size());
bool has_limit = false; bool has_limit = false;
UInt64 limit = 0; UInt64 limit = 0;
@ -273,9 +299,19 @@ AggregateFunctionPtr createAggregateFunctionGroupConcat(
limit = parameters[1].safeGet<UInt64>(); limit = parameters[1].safeGet<UInt64>();
} }
// Handle additional arguments if provided (delimiter and limit as arguments)
if (argument_types.size() == 2)
{
// Second argument should be delimiter (string)
const DataTypePtr & delimiter_type = argument_types[1];
if (!isString(delimiter_type))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument for function {} must be a string", name);
}
if (has_limit) if (has_limit)
return std::make_shared<GroupConcatImpl</* has_limit= */ true>>(argument_types[0], parameters, limit, delimiter); return std::make_shared<GroupConcatImpl</* has_limit= */ true>>(argument_types, parameters, limit, delimiter);
return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types[0], parameters, limit, delimiter); return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types, parameters, limit, delimiter);
} }
} }

View File

@ -16,4 +16,8 @@ abc,a,makson95
abc,a,makson95,abc,a,makson95,abc,a,makson95 abc,a,makson95,abc,a,makson95,abc,a,makson95
[1,2,3][993,986,979,972][][1,2,3][993,986,979,972][][1,2,3][993,986,979,972][] [1,2,3][993,986,979,972][][1,2,3][993,986,979,972][][1,2,3][993,986,979,972][]
488890 488890
TESTING GroupConcat second argument overload
95,123
abc.a.makson95
[1,2,3]/[993,986,979,972]/[]
488890 488890

View File

@ -42,6 +42,12 @@ SELECT groupConcat(',', 3, 3)(number) FROM numbers(10); -- { serverError TOO_MAN
SELECT length(groupConcat(number)) FROM numbers(100000); SELECT length(groupConcat(number)) FROM numbers(100000);
SELECT 'TESTING GroupConcat second argument overload';
SELECT groupConcat(p_int, ',') FROM test_groupConcat;
SELECT groupConcat('.')(p_string) FROM test_groupConcat;
SELECT groupConcat(p_array, '/') FROM test_groupConcat;
DROP TABLE IF EXISTS test_groupConcat; DROP TABLE IF EXISTS test_groupConcat;
CREATE TABLE test_groupConcat CREATE TABLE test_groupConcat