Merge pull request #34055 from palegre-tiny/groupSortedArray

Add groupSortedArray() function
This commit is contained in:
Nikolay Degterinsky 2022-03-31 01:20:15 +03:00 committed by GitHub
commit f055d7b692
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 758 additions and 0 deletions

View File

@ -0,0 +1,48 @@
---
toc_priority: 108
---
# groupArraySorted {#groupArraySorted}
Returns an array with the first N items in ascending order.
``` sql
groupArraySorted(N)(column)
```
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value 10 is used.
**Arguments**
- `column` The value.
- `expr` — Optional. The field or expresion to sort by. If not set values are sorted by themselves.
**Example**
Gets the first 10 numbers:
``` sql
SELECT groupArraySorted(10)(number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number)─┐
│ [0,1,2,3,4,5,6,7,8,9] │
└──────────────────────────────┘
```
Or the last 10:
``` sql
SELECT groupArraySorted(10)(number, -number) FROM numbers(100)
```
``` text
┌─groupArraySorted(10)(number, negate(number))─┐
│ [99,98,97,96,95,94,93,92,91,90] │
└──────────────────────────────────────────────┘
```

View File

@ -35,6 +35,7 @@ ClickHouse-specific aggregate functions:
- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupArraySorted](../../../sql-reference/aggregate-functions/reference/grouparraysorted.md)
- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md)

View File

@ -0,0 +1,147 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySorted.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <AggregateFunctions/Helpers.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeString.h>
#include <Common/FieldVisitorConvertToNumber.h>
static inline constexpr UInt64 GROUP_SORTED_ARRAY_MAX_SIZE = 0xFFFFFF;
static inline constexpr UInt64 GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD = 10;
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
namespace
{
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedNumeric : public AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<T, false, expr_sorted, TColumnB, is_plain_b>::AggregateFunctionGroupArraySorted;
};
template <typename T, bool expr_sorted, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySortedFieldType
: public AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>
{
using AggregateFunctionGroupArraySorted<typename T::FieldType, false, expr_sorted, TColumnB, is_plain_b>::
AggregateFunctionGroupArraySorted;
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(std::make_shared<T>()); }
};
template <template <typename, bool, typename, bool> class AggregateFunctionTemplate, typename TColumnA, bool expr_sorted, typename TColumnB, bool is_plain_b, typename... TArgs>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTypedFinal(TArgs && ... args)
{
return AggregateFunctionPtr(new AggregateFunctionTemplate<TColumnA, expr_sorted, TColumnB, is_plain_b>(std::forward<TArgs>(args)...));
}
template <bool expr_sorted = false, typename TColumnB = UInt64, bool is_plain_b = false>
AggregateFunctionPtr
createAggregateFunctionGroupArraySortedTyped(const DataTypes & argument_types, const Array & params, UInt64 threshold)
{
#define DISPATCH(A, C, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTypedFinal<C, B, expr_sorted, TColumnB, is_plain_b>(threshold, argument_types, params);
#define DISPATCH_NUMERIC(A) DISPATCH(A, AggregateFunctionGroupArraySortedNumeric, A)
WhichDataType which(argument_types[0]);
FOR_NUMERIC_TYPES(DISPATCH_NUMERIC)
DISPATCH(Enum8, AggregateFunctionGroupArraySortedNumeric, Int8)
DISPATCH(Enum16, AggregateFunctionGroupArraySortedNumeric, Int16)
DISPATCH(Date, AggregateFunctionGroupArraySortedFieldType, DataTypeDate)
DISPATCH(DateTime, AggregateFunctionGroupArraySortedFieldType, DataTypeDateTime)
#undef DISPATCH
#undef DISPATCH_NUMERIC
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, true, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
else
{
return AggregateFunctionPtr(new AggregateFunctionGroupArraySorted<StringRef, false, expr_sorted, TColumnB, is_plain_b>(
threshold, argument_types, params));
}
}
AggregateFunctionPtr createAggregateFunctionGroupArraySorted(
const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
UInt64 threshold = GROUP_SORTED_ARRAY_DEFAULT_THRESHOLD;
if (params.size() == 1)
{
UInt64 k = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (k > GROUP_SORTED_ARRAY_MAX_SIZE)
throw Exception(
"Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(GROUP_SORTED_ARRAY_MAX_SIZE),
ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (k == 0)
throw Exception("Parameter 0 is illegal for aggregate function " + name, ErrorCodes::ARGUMENT_OUT_OF_BOUND);
threshold = k;
}
else if (!params.empty())
{
throw Exception("Aggregate function " + name + " only supports 1 parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
if (argument_types.size() == 2)
{
if (isNumber(argument_types[1]))
{
#define DISPATCH2(A, B) \
if (which.idx == TypeIndex::A) \
return createAggregateFunctionGroupArraySortedTyped<true, B>(argument_types, params, threshold);
#define DISPATCH(A) DISPATCH2(A, A)
WhichDataType which(argument_types[1]);
FOR_NUMERIC_TYPES(DISPATCH)
DISPATCH2(Enum8, Int8)
DISPATCH2(Enum16, Int16)
#undef DISPATCH
#undef DISPATCH2
throw Exception("Invalid parameter type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
else if (argument_types[1]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, true>(argument_types, params, threshold);
}
else
{
return createAggregateFunctionGroupArraySortedTyped<true, StringRef, false>(argument_types, params, threshold);
}
}
else if (argument_types.size() == 1)
{
return createAggregateFunctionGroupArraySortedTyped<>(argument_types, params, threshold);
}
else
{
throw Exception(
"Aggregate function " + name + " requires one or two parameters.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
}
}
}
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
factory.registerFunction("groupArraySorted", {createAggregateFunctionGroupArraySorted, properties});
}
}

View File

@ -0,0 +1,310 @@
#pragma once
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeArray.h>
#include <AggregateFunctions/AggregateFunctionGroupArraySortedData.h>
#include <AggregateFunctions/IAggregateFunction.h>
namespace DB
{
template <typename TColumn, bool is_plain>
inline TColumn readItem(const IColumn * column, Arena * arena, size_t row)
{
if constexpr (std::is_same_v<TColumn, StringRef>)
{
if constexpr (is_plain)
{
StringRef str = column->getDataAt(row);
auto ptr = arena->alloc(str.size);
std::copy(str.data, str.data + str.size, ptr);
return StringRef(ptr, str.size);
}
else
{
const char * begin = nullptr;
return column->serializeValueIntoArena(row, *arena, begin);
}
}
else
{
if constexpr (std::is_same_v<TColumn, UInt64>)
return column->getUInt(row);
else
return column->getInt(row);
}
}
template <typename TColumn, typename TFilter = void>
size_t
getFirstNElements_low_threshold(const TColumn * data, int num_elements, int threshold, size_t * results, const TFilter * filter = nullptr)
{
for (int i = 0; i < threshold; i++)
{
results[i] = 0;
}
threshold = std::min(num_elements, threshold);
int current_max = 0;
int cur;
int z;
for (int i = 0; i < num_elements; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
//Starting from the highest values and we look for the immediately lower than the given one
for (cur = current_max; cur > 0; cur--)
{
if (data[i] > data[results[cur - 1]])
break;
}
if (cur < threshold)
{
//Move all the higher values 1 position to the right
for (z = std::min(threshold - 1, current_max); z > cur; z--)
results[z] = results[z - 1];
if (current_max < threshold)
++current_max;
//insert element into the given position
results[cur] = i;
}
}
return current_max;
}
template <typename T>
struct SortableItem
{
T a;
size_t b;
bool operator<(const SortableItem & other) const { return (this->a < other.a); }
};
template <typename TColumn, typename TFilter = void>
size_t getFirstNElements_high_threshold(
const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const TFilter * filter = nullptr)
{
std::vector<SortableItem<TColumn>> dataIndexed(num_elements);
size_t num_elements_filtered = 0;
for (size_t i = 0; i < num_elements; i++)
{
if constexpr (!std::is_same_v<TFilter, void>)
{
if (filter[i] == 0)
continue;
}
dataIndexed.data()[num_elements_filtered].a = data[i];
dataIndexed.data()[num_elements_filtered].b = i;
num_elements_filtered++;
}
threshold = std::min(num_elements_filtered, threshold);
std::nth_element(dataIndexed.data(), dataIndexed.data() + threshold, dataIndexed.data() + num_elements_filtered);
std::sort(dataIndexed.data(), dataIndexed.data() + threshold);
for (size_t i = 0; i < threshold; i++)
{
results[i] = dataIndexed[i].b;
}
return threshold;
}
static const size_t THRESHOLD_MAX_CUSTOM_FUNCTION = 1000;
template <typename TColumn>
size_t getFirstNElements(const TColumn * data, size_t num_elements, size_t threshold, size_t * results, const UInt8 * filter = nullptr)
{
if (threshold < THRESHOLD_MAX_CUSTOM_FUNCTION)
{
if (filter != nullptr)
return getFirstNElements_low_threshold(data, num_elements, threshold, results, filter);
else
return getFirstNElements_low_threshold(data, num_elements, threshold, results);
}
else
{
if (filter != nullptr)
return getFirstNElements_high_threshold(data, num_elements, threshold, results, filter);
else
return getFirstNElements_high_threshold(data, num_elements, threshold, results);
}
}
template <typename TColumnA, bool is_plain_a, bool use_column_b, typename TColumnB, bool is_plain_b>
class AggregateFunctionGroupArraySorted : public IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted<TColumnA, is_plain_a, use_column_b, TColumnB, is_plain_b>>
{
protected:
using State = AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>;
using Base = IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>;
UInt64 threshold;
DataTypePtr & input_data_type;
mutable std::mutex mutex;
static void deserializeAndInsert(StringRef str, IColumn & data_to);
public:
AggregateFunctionGroupArraySorted(UInt64 threshold_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<
AggregateFunctionGroupArraySortedData<TColumnA, use_column_b, TColumnB>,
AggregateFunctionGroupArraySorted>(argument_types_, params)
, threshold(threshold_)
, input_data_type(this->argument_types[0])
{
}
void create(AggregateDataPtr place) const override
{
Base::create(place);
this->data(place).threshold = threshold;
}
String getName() const override { return "groupArraySorted"; }
DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(input_data_type); }
bool allocatesMemoryInArena() const override
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
return true;
else
return false;
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
data.add(
readItem<TColumnA, is_plain_a>(columns[0], arena, row_num), readItem<TColumnB, is_plain_b>(columns[1], arena, row_num));
}
else
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row_num));
}
}
template <typename TColumn, bool is_plain, typename TFunc>
void
forFirstRows(size_t batch_size, const IColumn ** columns, size_t data_column, Arena * arena, ssize_t if_argument_pos, TFunc func) const
{
const TColumn * values = nullptr;
std::unique_ptr<std::vector<TColumn>> values_vector;
std::vector<size_t> best_rows(threshold);
if constexpr (std::is_same_v<TColumn, StringRef>)
{
values_vector.reset(new std::vector<TColumn>(batch_size));
for (size_t i = 0; i < batch_size; i++)
(*values_vector)[i] = readItem<TColumn, is_plain>(columns[data_column], arena, i);
values = (*values_vector).data();
}
else
{
const auto & column = assert_cast<const ColumnVector<TColumn> &>(*columns[data_column]);
values = column.getData().data();
}
const UInt8 * filter = nullptr;
StringRef refFilter;
if (if_argument_pos >= 0)
{
refFilter = columns[if_argument_pos]->getRawData();
filter = reinterpret_cast<const UInt8 *>(refFilter.data);
}
size_t num_elements = getFirstNElements(values, batch_size, threshold, best_rows.data(), filter);
for (size_t i = 0; i < num_elements; i++)
{
func(best_rows[i], values);
}
}
void addBatchSinglePlace(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos) const override
{
State & data = this->data(place);
if constexpr (use_column_b)
{
forFirstRows<TColumnB, is_plain_b>(
batch_size, columns, 1, arena, if_argument_pos, [columns, &arena, &data](size_t row, const TColumnB * values)
{
data.add(readItem<TColumnA, is_plain_a>(columns[0], arena, row), values[row]);
});
}
else
{
forFirstRows<TColumnA, is_plain_a>(
batch_size, columns, 0, arena, if_argument_pos, [&data](size_t row, const TColumnA * values)
{
data.add(values[row]);
});
}
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
this->data(place).serialize(buf);
}
void
deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
this->data(place).deserialize(buf, arena);
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * /*arena*/) const override
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
auto & values = this->data(place).values;
offsets_to.push_back(offsets_to.back() + values.size());
IColumn & data_to = arr_to.getData();
for (auto value : values)
{
if constexpr (std::is_same_v<TColumnA, StringRef>)
{
auto str = State::itemValue(value);
if constexpr (is_plain_a)
{
data_to.insertData(str.data, str.size);
}
else
{
data_to.deserializeAndInsertFromArena(str.data);
}
}
else
{
data_to.insert(State::itemValue(value));
}
}
}
};
}

View File

@ -0,0 +1,162 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/VarInt.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
static inline constexpr UInt64 GROUP_SORTED_DEFAULT_THRESHOLD = 0xFFFFFF;
namespace DB
{
template <typename T>
static void writeOneItem(WriteBuffer & buf, T item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
writeVarInt(item, buf);
}
else
{
writeVarUInt(item, buf);
}
}
static void writeOneItem(WriteBuffer & buf, const StringRef & item)
{
writeBinary(item, buf);
}
template <typename T>
static void readOneItem(ReadBuffer & buf, Arena * /*arena*/, T & item)
{
if constexpr (std::numeric_limits<T>::is_signed)
{
DB::Int64 val;
readVarT(val, buf);
item = val;
}
else
{
DB::UInt64 val;
readVarT(val, buf);
item = val;
}
}
static void readOneItem(ReadBuffer & buf, Arena * arena, StringRef & item)
{
item = readStringBinaryInto(*arena, buf);
}
template <typename Storage>
struct AggregateFunctionGroupArraySortedDataBase
{
typedef typename Storage::value_type ValueType;
AggregateFunctionGroupArraySortedDataBase(UInt64 threshold_ = GROUP_SORTED_DEFAULT_THRESHOLD) : threshold(threshold_) { }
virtual ~AggregateFunctionGroupArraySortedDataBase() { }
inline void narrowDown()
{
while (values.size() > threshold)
values.erase(--values.end());
}
void merge(const AggregateFunctionGroupArraySortedDataBase & other)
{
values.merge(Storage(other.values));
narrowDown();
}
void serialize(WriteBuffer & buf) const
{
writeOneItem(buf, UInt64(values.size()));
for (auto value : values)
{
serializeItem(buf, value);
}
}
virtual void serializeItem(WriteBuffer & buf, ValueType & val) const = 0;
virtual ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const = 0;
void deserialize(ReadBuffer & buf, Arena * arena)
{
values.clear();
UInt64 length;
readOneItem(buf, nullptr, length);
while (length--)
{
values.insert(deserializeItem(buf, arena));
}
narrowDown();
}
UInt64 threshold;
Storage values;
};
template <typename T, bool expr_sorted, typename TIndex>
struct AggregateFunctionGroupArraySortedData
{
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, true, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multimap<TIndex, T>>;
using Base::Base;
void add(T item, TIndex weight)
{
Base::values.insert({weight, item});
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override
{
writeOneItem(buf, value.first);
writeOneItem(buf, value.second);
}
virtual typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
TIndex first;
T second;
readOneItem(buf, arena, first);
readOneItem(buf, arena, second);
return {first, second};
}
static T itemValue(typename Base::ValueType & value) { return value.second; }
};
template <typename T, typename TIndex>
struct AggregateFunctionGroupArraySortedData<T, false, TIndex> : public AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>
{
using Base = AggregateFunctionGroupArraySortedDataBase<std::multiset<T>>;
using Base::Base;
void add(T item)
{
Base::values.insert(item);
Base::narrowDown();
}
void serializeItem(WriteBuffer & buf, typename Base::ValueType & value) const override { writeOneItem(buf, value); }
typename Base::ValueType deserializeItem(ReadBuffer & buf, Arena * arena) const override
{
T value;
readOneItem(buf, arena, value);
return value;
}
static T itemValue(typename Base::ValueType & value) { return value; }
};
}

View File

@ -59,6 +59,7 @@ void registerAggregateFunctionNothing(AggregateFunctionFactory &);
void registerAggregateFunctionExponentialMovingAverage(AggregateFunctionFactory &);
void registerAggregateFunctionSparkbar(AggregateFunctionFactory &);
void registerAggregateFunctionIntervalLengthSum(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factory);
class AggregateFunctionCombinatorFactory;
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
@ -130,6 +131,7 @@ void registerAggregateFunctions()
registerAggregateFunctionIntervalLengthSum(factory);
registerAggregateFunctionExponentialMovingAverage(factory);
registerAggregateFunctionSparkbar(factory);
registerAggregateFunctionGroupArraySorted(factory);
registerWindowFunctions(factory);
}

View File

@ -0,0 +1,27 @@
<test>
<settings>
<max_threads>10</max_threads>
</settings>
<substitutions>
<substitution>
<name>items</name>
<values>
<value>1000</value>
<value>100000</value>
<value>10000000</value>
</values>
</substitution>
</substitutions>
<create_query>CREATE TABLE test ( `id` UInt64, `value` UInt64, `text` String ) ENGINE = Memory</create_query>
<fill_query>INSERT INTO test SELECT number as id, rand64() as value, toString(number) as text FROM numbers({items})</fill_query>
<query>SELECT groupArraySorted(10)(id, value) FROM test</query>
<query>SELECT groupArraySorted(10)(text, value) FROM test</query>
<query>SELECT groupArraySorted(10)((id, text), value) FROM test</query>
<query>SELECT groupArraySorted(10)(text) FROM test</query>
<query>SELECT groupArraySorted(10000)(id, value) FROM test</query>
<query>SELECT groupArraySorted(10000)(text, value) FROM test</query>
<query>SELECT groupArraySorted(10000)((id, text), value) FROM test</query>
<query>SELECT groupArraySorted(10000)(text) FROM test</query>
<drop_query>DROP TABLE IF EXISTS test</drop_query>
</test>

View File

@ -0,0 +1,18 @@
[0,1,2,3,4]
[0,1,2,3,4,5,6,7,8,9]
[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99]
[999,998,997,996,995,994,993,992,991,990,989,988,987,986,985,984,983,982,981,980,979,978,977,976,975,974,973,972,971,970,969,968,967,966,965,964,963,962,961,960,959,958,957,956,955,954,953,952,951,950,949,948,947,946,945,944,943,942,941,940,939,938,937,936,935,934,933,932,931,930,929,928,927,926,925,924,923,922,921,920,919,918,917,916,915,914,913,912,911,910,909,908,907,906,905,904,903,902,901,900]
['0','1','2','3','4']
['0','1','2','3','4']
['9','8','7','6','5']
[(0,'0'),(1,'1'),(2,'2'),(3,'3'),(4,'4')]
['0','1','10','11','12']
['0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49']
[0,0,1,1,2,2,3,3,4,4]
pablo [1,2]
luis [1,3]
pablo [1,2]
luis [1,3]
[4,5,6,7,8]
[10,11,12,13,14]
['10','11','12','13','14']

View File

@ -0,0 +1,43 @@
SELECT groupArraySorted(5)(number) from numbers(100);
SELECT groupArraySorted(number, number) from numbers(100);
SELECT groupArraySorted(100)(number, number) from numbers(1000);
SELECT groupArraySorted(100)(number, -number) from numbers(1000);
SELECT groupArraySorted(5)(str, number) FROM (SELECT toString(number) as str, number FROM numbers(10));
SELECT groupArraySorted(5)(text) FROM (select toString(number) as text from numbers(10));
SELECT groupArraySorted(5)(text, -number) FROM (select toString(number) as text, number from numbers(10));
SELECT groupArraySorted(5)((number,text)) from (SELECT toString(number) as text, number FROM numbers(100));
SELECT groupArraySorted(5)(text,text) from (SELECT toString(number) as text FROM numbers(100));
SELECT groupArraySorted(50)(text,(number,text)) from (SELECT toString(number) as text, number FROM numbers(100));
SELECT groupArraySorted(10)(toInt64(number/2)) FROM numbers(100);
DROP TABLE IF EXISTS test;
DROP VIEW IF EXISTS mv_test;
CREATE TABLE test (`n` String, `h` Int64) ENGINE = MergeTree ORDER BY n;
CREATE MATERIALIZED VIEW mv_test (`n` String, `h` AggregateFunction(groupArraySorted(2), Int64, Int64)) ENGINE = AggregatingMergeTree ORDER BY n AS SELECT n, groupArraySortedState(2)(h, h) as h FROM test GROUP BY n;
INSERT INTO test VALUES ('pablo',1)('pablo', 2)('luis', 1)('luis', 3)('pablo', 5)('pablo',4)('pablo', 5)('luis', 6)('luis', 7)('pablo', 8)('pablo',9)('pablo',10)('luis',11)('luis',12)('pablo',13);
SELECT n, groupArraySortedMerge(2)(h) from mv_test GROUP BY n;
DROP TABLE IF EXISTS test;
DROP VIEW IF EXISTS mv_test;
CREATE TABLE test (`n` String, `h` Int64) ENGINE = MergeTree ORDER BY n;
CREATE MATERIALIZED VIEW mv_test (`n` String, `h` AggregateFunction(groupArraySorted(2), Int64)) ENGINE = AggregatingMergeTree ORDER BY n AS SELECT n, groupArraySortedState(2)(h) as h FROM test GROUP BY n;
INSERT INTO test VALUES ('pablo',1)('pablo', 2)('luis', 1)('luis', 3)('pablo', 5)('pablo',4)('pablo', 5)('luis', 6)('luis', 7)('pablo', 8)('pablo',9)('pablo',10)('luis',11)('luis',12)('pablo',13);
SELECT n, groupArraySortedMerge(2)(h) from mv_test GROUP BY n;
DROP TABLE test;
DROP VIEW mv_test;
SELECT groupArraySortedIf(5)(number, number, number>3) from numbers(100);
SELECT groupArraySortedIf(5)(number, toString(number), number>3) from numbers(100);
SELECT groupArraySortedIf(5)(toString(number), number>3) from numbers(100);