2013-02-03 18:39:09 +00:00
|
|
|
|
#include <iomanip>
|
|
|
|
|
|
|
|
|
|
#include <statdaemons/Stopwatch.h>
|
|
|
|
|
|
2011-09-19 03:34:23 +00:00
|
|
|
|
#include <DB/DataTypes/DataTypeAggregateFunction.h>
|
|
|
|
|
#include <DB/Columns/ColumnAggregateFunction.h>
|
2011-09-26 07:25:22 +00:00
|
|
|
|
#include <DB/Columns/ColumnString.h>
|
|
|
|
|
#include <DB/Columns/ColumnFixedString.h>
|
|
|
|
|
#include <DB/Columns/ColumnsNumber.h>
|
2012-07-15 23:13:08 +00:00
|
|
|
|
#include <DB/AggregateFunctions/AggregateFunctionCount.h>
|
2011-09-19 03:34:23 +00:00
|
|
|
|
|
|
|
|
|
#include <DB/Interpreters/Aggregator.h>
|
2011-09-19 01:42:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
2011-09-26 07:25:22 +00:00
|
|
|
|
|
2013-02-16 18:59:05 +00:00
|
|
|
|
AggregatedDataVariants::~AggregatedDataVariants()
|
|
|
|
|
{
|
2013-11-03 23:54:12 +00:00
|
|
|
|
if (aggregator && !aggregator->all_aggregates_has_trivial_destructor)
|
2013-02-16 18:59:05 +00:00
|
|
|
|
aggregator->destroyAggregateStates(*this);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2012-03-05 07:58:34 +00:00
|
|
|
|
void Aggregator::initialize(Block & block)
|
|
|
|
|
{
|
|
|
|
|
Poco::ScopedLock<Poco::FastMutex> lock(mutex);
|
|
|
|
|
|
|
|
|
|
if (initialized)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
initialized = true;
|
2013-02-13 19:24:19 +00:00
|
|
|
|
|
|
|
|
|
aggregate_functions.resize(aggregates_size);
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i] = &*aggregates[i].function;
|
|
|
|
|
|
|
|
|
|
/// Инициализируем размеры состояний и смещения для агрегатных функций.
|
|
|
|
|
offsets_of_aggregate_states.resize(aggregates_size);
|
|
|
|
|
total_size_of_aggregate_states = 0;
|
2013-11-03 23:54:12 +00:00
|
|
|
|
all_aggregates_has_trivial_destructor = true;
|
2013-02-13 19:24:19 +00:00
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
{
|
|
|
|
|
offsets_of_aggregate_states[i] = total_size_of_aggregate_states;
|
|
|
|
|
total_size_of_aggregate_states += aggregates[i].function->sizeOfData();
|
2013-11-03 23:54:12 +00:00
|
|
|
|
|
|
|
|
|
if (!aggregates[i].function->hasTrivialDestructor())
|
|
|
|
|
all_aggregates_has_trivial_destructor = false;
|
2013-02-13 19:24:19 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Всё остальное - только если передан непустой block.
|
|
|
|
|
* (всё остальное не нужно в методе merge блоков с готовыми состояниями агрегатных функций).
|
|
|
|
|
*/
|
|
|
|
|
if (!block)
|
|
|
|
|
return;
|
2012-03-05 07:58:34 +00:00
|
|
|
|
|
|
|
|
|
/// Преобразуем имена столбцов в номера, если номера не заданы
|
|
|
|
|
if (keys.empty() && !key_names.empty())
|
|
|
|
|
for (Names::const_iterator it = key_names.begin(); it != key_names.end(); ++it)
|
|
|
|
|
keys.push_back(block.getPositionByName(*it));
|
|
|
|
|
|
|
|
|
|
for (AggregateDescriptions::iterator it = aggregates.begin(); it != aggregates.end(); ++it)
|
|
|
|
|
if (it->arguments.empty() && !it->argument_names.empty())
|
|
|
|
|
for (Names::const_iterator jt = it->argument_names.begin(); jt != it->argument_names.end(); ++jt)
|
|
|
|
|
it->arguments.push_back(block.getPositionByName(*jt));
|
|
|
|
|
|
|
|
|
|
/// Создадим пример блока, описывающего результат
|
|
|
|
|
if (!sample)
|
|
|
|
|
{
|
2013-02-08 20:34:30 +00:00
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
2012-05-31 01:13:15 +00:00
|
|
|
|
{
|
2012-03-05 07:58:34 +00:00
|
|
|
|
sample.insert(block.getByPosition(keys[i]).cloneEmpty());
|
2012-05-31 01:13:15 +00:00
|
|
|
|
if (sample.getByPosition(i).column->isConst())
|
|
|
|
|
sample.getByPosition(i).column = dynamic_cast<IColumnConst &>(*sample.getByPosition(i).column).convertToFullColumn();
|
|
|
|
|
}
|
2012-03-05 07:58:34 +00:00
|
|
|
|
|
2013-02-08 20:34:30 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2012-03-05 07:58:34 +00:00
|
|
|
|
{
|
|
|
|
|
ColumnWithNameAndType col;
|
|
|
|
|
col.name = aggregates[i].column_name;
|
2012-10-24 18:14:36 +00:00
|
|
|
|
|
|
|
|
|
size_t arguments_size = aggregates[i].arguments.size();
|
|
|
|
|
DataTypes argument_types(arguments_size);
|
|
|
|
|
for (size_t j = 0; j < arguments_size; ++j)
|
|
|
|
|
argument_types[j] = block.getByPosition(aggregates[i].arguments[j]).type;
|
|
|
|
|
|
|
|
|
|
col.type = new DataTypeAggregateFunction(aggregates[i].function, argument_types);
|
2013-02-08 20:34:30 +00:00
|
|
|
|
col.column = new ColumnAggregateFunction(aggregates[i].function);
|
2012-03-05 07:58:34 +00:00
|
|
|
|
|
|
|
|
|
sample.insert(col);
|
|
|
|
|
}
|
2013-02-08 20:34:30 +00:00
|
|
|
|
}
|
2012-03-05 07:58:34 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
bool keys_fit_128_bits = true;
|
2012-05-30 01:38:02 +00:00
|
|
|
|
size_t keys_bytes = 0;
|
|
|
|
|
key_sizes.resize(keys_size);
|
|
|
|
|
for (size_t j = 0; j < keys_size; ++j)
|
|
|
|
|
{
|
2013-07-19 20:12:02 +00:00
|
|
|
|
if (!key_columns[j]->isFixed())
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
|
|
|
|
keys_fit_128_bits = false;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
key_sizes[j] = key_columns[j]->sizeOfField();
|
|
|
|
|
keys_bytes += key_sizes[j];
|
|
|
|
|
}
|
|
|
|
|
if (keys_bytes > 16)
|
|
|
|
|
keys_fit_128_bits = false;
|
|
|
|
|
|
|
|
|
|
/// Если ключей нет
|
|
|
|
|
if (keys_size == 0)
|
|
|
|
|
return AggregatedDataVariants::WITHOUT_KEY;
|
|
|
|
|
|
2013-07-19 20:12:02 +00:00
|
|
|
|
/// Если есть один числовой ключ, который помещается в 64 бита
|
2013-02-16 20:15:45 +00:00
|
|
|
|
if (keys_size == 1 && key_columns[0]->isNumeric())
|
2012-05-30 01:38:02 +00:00
|
|
|
|
return AggregatedDataVariants::KEY_64;
|
|
|
|
|
|
2013-07-19 20:12:02 +00:00
|
|
|
|
/// Если ключи помещаются в 128 бит, будем использовать хэш-таблицу по упакованным в 128-бит ключам
|
|
|
|
|
if (keys_fit_128_bits)
|
|
|
|
|
return AggregatedDataVariants::KEYS_128;
|
|
|
|
|
|
2012-05-30 01:38:02 +00:00
|
|
|
|
/// Если есть один строковый ключ, то используем хэш-таблицу с ним
|
|
|
|
|
if (keys_size == 1
|
2013-01-08 19:41:22 +00:00
|
|
|
|
&& (dynamic_cast<const ColumnString *>(key_columns[0]) || dynamic_cast<const ColumnFixedString *>(key_columns[0])
|
|
|
|
|
|| dynamic_cast<const ColumnConstString *>(key_columns[0])))
|
2012-05-30 01:38:02 +00:00
|
|
|
|
return AggregatedDataVariants::KEY_STRING;
|
|
|
|
|
|
2013-06-30 16:56:00 +00:00
|
|
|
|
/// Иначе будем агрегировать по хэшу от ключей.
|
2012-05-30 01:38:02 +00:00
|
|
|
|
return AggregatedDataVariants::HASHED;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2011-09-28 05:24:38 +00:00
|
|
|
|
/** Результат хранится в оперативке и должен полностью помещаться в оперативку.
|
2011-09-19 01:42:16 +00:00
|
|
|
|
*/
|
2011-09-26 07:25:22 +00:00
|
|
|
|
void Aggregator::execute(BlockInputStreamPtr stream, AggregatedDataVariants & result)
|
2011-09-19 01:42:16 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
StringRefs key(keys_size);
|
2013-01-08 19:41:22 +00:00
|
|
|
|
ConstColumnPlainPtrs key_columns(keys_size);
|
2011-09-19 01:42:16 +00:00
|
|
|
|
|
2013-01-08 19:41:22 +00:00
|
|
|
|
typedef std::vector<ConstColumnPlainPtrs> AggregateColumns;
|
2011-09-19 01:42:16 +00:00
|
|
|
|
AggregateColumns aggregate_columns(aggregates_size);
|
|
|
|
|
|
2012-12-25 19:28:59 +00:00
|
|
|
|
/** Используется, если есть ограничение на максимальное количество строк при агрегации,
|
|
|
|
|
* и если group_by_overflow_mode == ANY.
|
|
|
|
|
* В этом случае, новые ключи не добавляются в набор, а производится агрегация только по
|
|
|
|
|
* ключам, которые уже успели попасть в набор.
|
|
|
|
|
*/
|
|
|
|
|
bool no_more_keys = false;
|
|
|
|
|
|
2013-02-04 03:31:53 +00:00
|
|
|
|
LOG_TRACE(log, "Aggregating");
|
2013-02-08 20:34:30 +00:00
|
|
|
|
|
2013-02-04 03:31:53 +00:00
|
|
|
|
Stopwatch watch;
|
|
|
|
|
|
|
|
|
|
size_t src_rows = 0;
|
|
|
|
|
size_t src_bytes = 0;
|
2013-02-09 01:02:52 +00:00
|
|
|
|
|
|
|
|
|
Sizes key_sizes;
|
|
|
|
|
|
|
|
|
|
/// Читаем все данные
|
2011-09-19 01:42:16 +00:00
|
|
|
|
while (Block block = stream->read())
|
|
|
|
|
{
|
2012-03-05 07:58:34 +00:00
|
|
|
|
initialize(block);
|
2013-02-16 18:59:05 +00:00
|
|
|
|
|
|
|
|
|
/// result будет уничтожать состояния агрегатных функций в деструкторе
|
|
|
|
|
result.aggregator = this;
|
|
|
|
|
|
2013-02-04 03:31:53 +00:00
|
|
|
|
src_rows += block.rows();
|
|
|
|
|
src_bytes += block.bytes();
|
2011-09-25 05:07:47 +00:00
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_columns[i].resize(aggregates[i].arguments.size());
|
2011-09-24 20:32:41 +00:00
|
|
|
|
|
2011-09-19 01:42:16 +00:00
|
|
|
|
/// Запоминаем столбцы, с которыми будем работать
|
2012-05-30 01:38:02 +00:00
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
2011-09-19 01:42:16 +00:00
|
|
|
|
key_columns[i] = block.getByPosition(keys[i]).column;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2013-06-25 14:16:16 +00:00
|
|
|
|
{
|
2011-09-19 01:42:16 +00:00
|
|
|
|
for (size_t j = 0; j < aggregate_columns[i].size(); ++j)
|
2013-06-25 14:16:16 +00:00
|
|
|
|
{
|
2011-09-19 01:42:16 +00:00
|
|
|
|
aggregate_columns[i][j] = block.getByPosition(aggregates[i].arguments[j]).column;
|
|
|
|
|
|
2013-06-25 14:16:16 +00:00
|
|
|
|
/** Агрегатные функции рассчитывают, что в них передаются полноценные столбцы.
|
|
|
|
|
* Поэтому, стобцы-константы не разрешены в качестве аргументов агрегатных функций.
|
|
|
|
|
*/
|
|
|
|
|
if (aggregate_columns[i][j]->isConst())
|
|
|
|
|
throw Exception("Constants is not allowed as arguments of aggregate functions", ErrorCodes::ILLEGAL_COLUMN);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-19 01:42:16 +00:00
|
|
|
|
size_t rows = block.rows();
|
|
|
|
|
|
2011-09-26 07:25:22 +00:00
|
|
|
|
/// Каким способом выполнять агрегацию?
|
2013-02-09 01:02:52 +00:00
|
|
|
|
if (result.empty())
|
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
result.type = chooseAggregationMethod(key_columns, key_sizes);
|
2013-02-09 02:20:26 +00:00
|
|
|
|
result.keys_size = keys_size;
|
2013-06-30 16:56:00 +00:00
|
|
|
|
result.key_sizes = key_sizes;
|
|
|
|
|
LOG_TRACE(log, "Aggregation method: " << result.getMethodName());
|
2013-02-09 01:02:52 +00:00
|
|
|
|
}
|
2011-09-26 12:50:50 +00:00
|
|
|
|
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (result.type == AggregatedDataVariants::WITHOUT_KEY || with_totals)
|
2011-09-19 01:42:16 +00:00
|
|
|
|
{
|
2011-09-26 07:25:22 +00:00
|
|
|
|
AggregatedDataWithoutKey & res = result.without_key;
|
2013-02-08 20:34:30 +00:00
|
|
|
|
if (!res)
|
2011-09-26 11:58:35 +00:00
|
|
|
|
{
|
2013-02-08 20:34:30 +00:00
|
|
|
|
res = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
|
|
|
|
|
2011-09-26 11:58:35 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2013-07-28 01:42:36 +00:00
|
|
|
|
aggregate_functions[i]->create(res + offsets_of_aggregate_states[i]);
|
2011-09-26 11:58:35 +00:00
|
|
|
|
}
|
2012-07-15 23:13:08 +00:00
|
|
|
|
|
|
|
|
|
/// Оптимизация в случае единственной агрегатной функции count.
|
2013-10-23 23:12:40 +00:00
|
|
|
|
AggregateFunctionCount * agg_count = aggregates_size == 1
|
|
|
|
|
? dynamic_cast<AggregateFunctionCount *>(aggregate_functions[0])
|
|
|
|
|
: NULL;
|
|
|
|
|
|
|
|
|
|
if (agg_count)
|
2013-02-08 20:34:30 +00:00
|
|
|
|
agg_count->addDelta(res, rows);
|
2012-07-15 23:13:08 +00:00
|
|
|
|
else
|
2011-09-26 07:25:22 +00:00
|
|
|
|
{
|
2012-07-15 23:13:08 +00:00
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2011-09-26 07:25:22 +00:00
|
|
|
|
{
|
2012-07-15 23:13:08 +00:00
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-25 08:06:40 +00:00
|
|
|
|
aggregate_functions[j]->add(res + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i);
|
2011-09-26 07:25:22 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2013-05-06 11:45:28 +00:00
|
|
|
|
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (result.type == AggregatedDataVariants::KEY_64)
|
2011-09-26 07:25:22 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithUInt64Key & res = result.key64;
|
2013-01-08 19:41:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2011-09-19 01:42:16 +00:00
|
|
|
|
|
2011-09-26 07:25:22 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2011-09-19 01:42:16 +00:00
|
|
|
|
{
|
2011-09-26 07:25:22 +00:00
|
|
|
|
/// Строим ключ
|
2013-02-08 20:34:30 +00:00
|
|
|
|
UInt64 key = get<UInt64>(column[i]);
|
2011-09-26 07:25:22 +00:00
|
|
|
|
|
2011-12-19 02:00:40 +00:00
|
|
|
|
AggregatedDataWithUInt64Key::iterator it;
|
|
|
|
|
bool inserted;
|
2012-12-25 19:28:59 +00:00
|
|
|
|
|
|
|
|
|
if (!no_more_keys)
|
|
|
|
|
res.emplace(key, it, inserted);
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
inserted = false;
|
|
|
|
|
it = res.find(key);
|
|
|
|
|
if (res.end() == it)
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2011-12-19 02:00:40 +00:00
|
|
|
|
|
|
|
|
|
if (inserted)
|
2011-09-26 07:25:22 +00:00
|
|
|
|
{
|
2013-02-08 20:34:30 +00:00
|
|
|
|
it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2011-12-19 02:00:40 +00:00
|
|
|
|
|
2011-09-26 07:25:22 +00:00
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-08 20:34:30 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second + offsets_of_aggregate_states[j]);
|
2011-09-26 07:25:22 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
2011-09-19 01:42:16 +00:00
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-25 08:06:40 +00:00
|
|
|
|
aggregate_functions[j]->add(it->second + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i);
|
2011-09-19 01:42:16 +00:00
|
|
|
|
}
|
2011-09-26 07:25:22 +00:00
|
|
|
|
}
|
2012-05-30 01:38:02 +00:00
|
|
|
|
else if (result.type == AggregatedDataVariants::KEY_STRING)
|
2011-09-26 15:22:25 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithStringKey & res = result.key_string;
|
2013-01-08 19:41:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2011-09-26 15:22:25 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
if (const ColumnString * column_string = dynamic_cast<const ColumnString *>(&column))
|
2011-09-26 15:22:25 +00:00
|
|
|
|
{
|
2012-05-31 05:41:56 +00:00
|
|
|
|
const ColumnString::Offsets_t & offsets = column_string->getOffsets();
|
2013-05-05 15:25:25 +00:00
|
|
|
|
const ColumnString::Chars_t & data = column_string->getChars();
|
2011-09-26 15:22:25 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2011-09-26 15:22:25 +00:00
|
|
|
|
{
|
2012-05-31 05:41:56 +00:00
|
|
|
|
/// Строим ключ
|
2013-04-13 00:56:07 +00:00
|
|
|
|
StringRef ref(&data[i == 0 ? 0 : offsets[i - 1]], (i == 0 ? offsets[i] : (offsets[i] - offsets[i - 1])) - 1);
|
2011-09-26 15:22:25 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
AggregatedDataWithStringKey::iterator it;
|
|
|
|
|
bool inserted;
|
2012-12-25 19:28:59 +00:00
|
|
|
|
|
|
|
|
|
if (!no_more_keys)
|
|
|
|
|
res.emplace(ref, it, inserted);
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
inserted = false;
|
|
|
|
|
it = res.find(ref);
|
|
|
|
|
if (res.end() == it)
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2012-05-31 05:41:56 +00:00
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
|
|
|
|
it->first.data = result.string_pool.insert(ref.data, ref.size);
|
2013-02-08 20:34:30 +00:00
|
|
|
|
it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-08 20:34:30 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second + offsets_of_aggregate_states[j]);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
2011-09-26 15:22:25 +00:00
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-25 08:06:40 +00:00
|
|
|
|
aggregate_functions[j]->add(it->second + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i);
|
2011-09-26 15:22:25 +00:00
|
|
|
|
}
|
2012-05-31 05:41:56 +00:00
|
|
|
|
}
|
|
|
|
|
else if (const ColumnFixedString * column_string = dynamic_cast<const ColumnFixedString *>(&column))
|
|
|
|
|
{
|
|
|
|
|
size_t n = column_string->getN();
|
2013-05-05 15:25:25 +00:00
|
|
|
|
const ColumnFixedString::Chars_t & data = column_string->getChars();
|
2011-09-26 15:22:25 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2011-09-26 15:22:25 +00:00
|
|
|
|
{
|
2012-05-31 05:41:56 +00:00
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef ref(&data[i * n], n);
|
2011-09-26 15:22:25 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
AggregatedDataWithStringKey::iterator it;
|
|
|
|
|
bool inserted;
|
2012-12-25 19:28:59 +00:00
|
|
|
|
|
|
|
|
|
if (!no_more_keys)
|
|
|
|
|
res.emplace(ref, it, inserted);
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
inserted = false;
|
|
|
|
|
it = res.find(ref);
|
|
|
|
|
if (res.end() == it)
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2012-05-31 05:41:56 +00:00
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
|
|
|
|
it->first.data = result.string_pool.insert(ref.data, ref.size);
|
2013-02-08 20:34:30 +00:00
|
|
|
|
it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-08 20:34:30 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second + offsets_of_aggregate_states[j]);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-25 08:06:40 +00:00
|
|
|
|
aggregate_functions[j]->add(it->second + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i);
|
2011-09-26 15:22:25 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2012-05-31 05:41:56 +00:00
|
|
|
|
else
|
|
|
|
|
throw Exception("Illegal type of column when aggregating by string key: " + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
2011-09-26 15:22:25 +00:00
|
|
|
|
}
|
2013-06-30 16:56:00 +00:00
|
|
|
|
else if (result.type == AggregatedDataVariants::KEYS_128)
|
2011-09-26 07:25:22 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataWithKeys128 & res = result.keys128;
|
2011-09-19 01:42:16 +00:00
|
|
|
|
|
2011-09-26 07:25:22 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
2011-09-19 01:42:16 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataWithKeys128::iterator it;
|
2011-12-19 02:00:40 +00:00
|
|
|
|
bool inserted;
|
2013-06-30 16:56:00 +00:00
|
|
|
|
UInt128 key128 = pack128(i, keys_size, key_columns, key_sizes);
|
2012-12-25 19:28:59 +00:00
|
|
|
|
|
|
|
|
|
if (!no_more_keys)
|
|
|
|
|
res.emplace(key128, it, inserted);
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
inserted = false;
|
|
|
|
|
it = res.find(key128);
|
|
|
|
|
if (res.end() == it)
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2011-12-19 02:00:40 +00:00
|
|
|
|
|
|
|
|
|
if (inserted)
|
2011-09-26 07:25:22 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
|
|
|
|
|
2011-09-26 07:25:22 +00:00
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second + offsets_of_aggregate_states[j]);
|
2011-09-26 07:25:22 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[j]->add(it->second + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i);
|
2011-09-19 01:42:16 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-30 16:56:00 +00:00
|
|
|
|
else if (result.type == AggregatedDataVariants::HASHED)
|
2011-09-26 07:25:22 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataHashed & res = result.hashed;
|
|
|
|
|
|
2011-09-26 07:25:22 +00:00
|
|
|
|
/// Для всех строчек
|
|
|
|
|
for (size_t i = 0; i < rows; ++i)
|
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataHashed::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
UInt128 key128 = hash128(i, keys_size, key_columns, key);
|
2011-09-26 07:25:22 +00:00
|
|
|
|
|
2013-06-30 16:56:00 +00:00
|
|
|
|
if (!no_more_keys)
|
|
|
|
|
res.emplace(key128, it, inserted);
|
|
|
|
|
else
|
2011-09-26 07:25:22 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
inserted = false;
|
|
|
|
|
it = res.find(key128);
|
|
|
|
|
if (res.end() == it)
|
2012-12-25 19:28:59 +00:00
|
|
|
|
continue;
|
2013-06-30 16:56:00 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
|
|
|
|
it->second.first = placeKeysInPool(i, keys_size, key, result.keys_pool);
|
|
|
|
|
it->second.second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2011-09-26 07:25:22 +00:00
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second.second + offsets_of_aggregate_states[j]);
|
2011-09-26 07:25:22 +00:00
|
|
|
|
}
|
2011-09-19 01:42:16 +00:00
|
|
|
|
|
2011-09-26 07:25:22 +00:00
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[j]->add(it->second.second + offsets_of_aggregate_states[j], &aggregate_columns[j][0], i);
|
2011-09-26 07:25:22 +00:00
|
|
|
|
}
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
2013-05-06 11:45:28 +00:00
|
|
|
|
else if (result.type != AggregatedDataVariants::WITHOUT_KEY)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
2012-12-25 19:28:59 +00:00
|
|
|
|
|
|
|
|
|
/// Проверка ограничений.
|
|
|
|
|
if (!no_more_keys && max_rows_to_group_by && result.size() > max_rows_to_group_by)
|
|
|
|
|
{
|
|
|
|
|
if (group_by_overflow_mode == Limits::THROW)
|
2013-06-21 20:34:19 +00:00
|
|
|
|
throw Exception("Limit for rows to GROUP BY exceeded: has " + toString(result.size())
|
|
|
|
|
+ " rows, maximum: " + toString(max_rows_to_group_by),
|
2012-12-25 19:28:59 +00:00
|
|
|
|
ErrorCodes::TOO_MUCH_ROWS);
|
|
|
|
|
else if (group_by_overflow_mode == Limits::BREAK)
|
|
|
|
|
break;
|
|
|
|
|
else if (group_by_overflow_mode == Limits::ANY)
|
|
|
|
|
no_more_keys = true;
|
|
|
|
|
else
|
2013-05-17 08:02:34 +00:00
|
|
|
|
throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
|
2012-12-25 19:28:59 +00:00
|
|
|
|
}
|
2011-09-26 07:25:22 +00:00
|
|
|
|
}
|
2013-02-04 03:31:53 +00:00
|
|
|
|
|
|
|
|
|
double elapsed_seconds = watch.elapsedSeconds();
|
|
|
|
|
size_t rows = result.size();
|
|
|
|
|
LOG_TRACE(log, std::fixed << std::setprecision(3)
|
|
|
|
|
<< "Aggregated. " << src_rows << " to " << rows << " rows (from " << src_bytes / 1048576.0 << " MiB)"
|
|
|
|
|
<< " in " << elapsed_seconds << " sec."
|
|
|
|
|
<< " (" << src_rows / elapsed_seconds << " rows/sec., " << src_bytes / elapsed_seconds / 1048576.0 << " MiB/sec.)");
|
2011-09-19 01:42:16 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
Block Aggregator::convertToBlock(AggregatedDataVariants & data_variants, bool separate_totals, Block & totals, bool final)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-09-16 05:33:49 +00:00
|
|
|
|
Block res = sample.cloneEmpty();
|
2013-02-03 18:08:52 +00:00
|
|
|
|
size_t rows = data_variants.size();
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-09-01 04:55:41 +00:00
|
|
|
|
if (with_totals && separate_totals && rows != 0)
|
|
|
|
|
--rows; /// Строчка с "тотальными" значениями идёт отдельно.
|
|
|
|
|
|
2012-05-31 00:33:42 +00:00
|
|
|
|
LOG_TRACE(log, "Converting aggregated data to block");
|
|
|
|
|
|
2013-02-03 18:39:09 +00:00
|
|
|
|
Stopwatch watch;
|
|
|
|
|
|
2012-02-27 06:28:20 +00:00
|
|
|
|
/// В какой структуре данных агрегированы данные?
|
2012-05-10 07:47:13 +00:00
|
|
|
|
if (data_variants.empty())
|
2013-09-16 05:33:49 +00:00
|
|
|
|
return Block();
|
2012-05-10 07:47:13 +00:00
|
|
|
|
|
2013-02-03 18:45:33 +00:00
|
|
|
|
typedef std::vector<ColumnAggregateFunction::Container_t *> AggregateColumns;
|
2013-02-03 18:08:52 +00:00
|
|
|
|
|
|
|
|
|
ColumnPlainPtrs key_columns(keys_size);
|
|
|
|
|
AggregateColumns aggregate_columns(aggregates_size);
|
2013-11-03 23:35:18 +00:00
|
|
|
|
ColumnPlainPtrs final_aggregate_columns(aggregates_size);
|
2013-02-03 18:08:52 +00:00
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
2013-02-03 18:39:09 +00:00
|
|
|
|
{
|
2013-02-03 18:08:52 +00:00
|
|
|
|
key_columns[i] = res.getByPosition(i).column;
|
2013-02-03 18:39:09 +00:00
|
|
|
|
key_columns[i]->reserve(rows);
|
|
|
|
|
}
|
2013-02-03 18:08:52 +00:00
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2013-02-03 18:39:09 +00:00
|
|
|
|
{
|
2013-11-03 23:35:18 +00:00
|
|
|
|
if (!final)
|
|
|
|
|
{
|
|
|
|
|
/// Столбец ColumnAggregateFunction захватывает разделяемое владение ареной с состояниями агрегатных функций.
|
|
|
|
|
ColumnAggregateFunction & column_aggregate_func = static_cast<ColumnAggregateFunction &>(*res.getByPosition(i + keys_size).column);
|
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < data_variants.aggregates_pools.size(); ++j)
|
|
|
|
|
column_aggregate_func.addArena(data_variants.aggregates_pools[j]);
|
2013-02-09 00:12:04 +00:00
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
aggregate_columns[i] = &column_aggregate_func.getData();
|
|
|
|
|
aggregate_columns[i]->resize(rows);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
ColumnWithNameAndType & column = res.getByPosition(i + keys_size);
|
|
|
|
|
column.type = aggregate_functions[i]->getReturnType();
|
|
|
|
|
column.column = column.type->createColumn();
|
|
|
|
|
column.column->reserve(rows);
|
2013-02-08 20:34:30 +00:00
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
final_aggregate_columns[i] = column.column;
|
|
|
|
|
}
|
2013-02-03 18:39:09 +00:00
|
|
|
|
}
|
2013-02-03 18:08:52 +00:00
|
|
|
|
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (data_variants.type == AggregatedDataVariants::WITHOUT_KEY || with_totals)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithoutKey & data = data_variants.without_key;
|
|
|
|
|
|
2013-09-02 20:28:18 +00:00
|
|
|
|
if (with_totals && separate_totals)
|
2013-09-01 04:55:41 +00:00
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
{
|
|
|
|
|
totals = res.cloneEmpty();
|
2013-05-04 15:46:50 +00:00
|
|
|
|
|
2013-09-01 04:55:41 +00:00
|
|
|
|
/// Для тотальных данных вместо ключей пишутся значения по-умолчанию (нули, пустые строки).
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
|
|
|
|
totals.getByPosition(i).column->insertDefault();
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
{
|
|
|
|
|
ColumnWithNameAndType & column = totals.getByPosition(i + keys_size);
|
|
|
|
|
column.type = aggregate_functions[i]->getReturnType();
|
|
|
|
|
column.column = column.type->createColumn();
|
|
|
|
|
aggregate_functions[i]->insertResultInto(data + offsets_of_aggregate_states[i], *column.column);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2013-11-03 23:35:18 +00:00
|
|
|
|
if (!final)
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
(*aggregate_columns[i])[0] = data + offsets_of_aggregate_states[i];
|
|
|
|
|
else
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->insertResultInto(data + offsets_of_aggregate_states[i], *final_aggregate_columns[i]);
|
2013-09-01 04:55:41 +00:00
|
|
|
|
|
|
|
|
|
if (with_totals)
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
|
|
|
|
key_columns[i]->insertDefault();
|
|
|
|
|
}
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
2013-05-06 11:45:28 +00:00
|
|
|
|
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (data_variants.type == AggregatedDataVariants::KEY_64)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithUInt64Key & data = data_variants.key64;
|
|
|
|
|
|
2013-02-03 18:08:52 +00:00
|
|
|
|
IColumn & first_column = *key_columns[0];
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-09-01 04:55:41 +00:00
|
|
|
|
size_t j = with_totals && !separate_totals ? 1 : 0;
|
2013-11-03 23:35:18 +00:00
|
|
|
|
|
|
|
|
|
if (!final)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-11-03 23:35:18 +00:00
|
|
|
|
for (AggregatedDataWithUInt64Key::const_iterator it = data.begin(); it != data.end(); ++it, ++j)
|
|
|
|
|
{
|
|
|
|
|
first_column.insertData(reinterpret_cast<const char *>(&it->first), sizeof(it->first));
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
(*aggregate_columns[i])[j] = it->second + offsets_of_aggregate_states[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
for (AggregatedDataWithUInt64Key::const_iterator it = data.begin(); it != data.end(); ++it, ++j)
|
|
|
|
|
{
|
|
|
|
|
first_column.insertData(reinterpret_cast<const char *>(&it->first), sizeof(it->first));
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->insertResultInto(it->second + offsets_of_aggregate_states[i], *final_aggregate_columns[i]);
|
|
|
|
|
}
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (data_variants.type == AggregatedDataVariants::KEY_STRING)
|
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithStringKey & data = data_variants.key_string;
|
2013-02-03 18:08:52 +00:00
|
|
|
|
IColumn & first_column = *key_columns[0];
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-09-01 04:55:41 +00:00
|
|
|
|
size_t j = with_totals && !separate_totals ? 1 : 0;
|
2013-11-03 23:35:18 +00:00
|
|
|
|
|
|
|
|
|
if (!final)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-11-03 23:35:18 +00:00
|
|
|
|
for (AggregatedDataWithStringKey::const_iterator it = data.begin(); it != data.end(); ++it, ++j)
|
|
|
|
|
{
|
|
|
|
|
first_column.insertData(it->first.data, it->first.size);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
(*aggregate_columns[i])[j] = it->second + offsets_of_aggregate_states[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
for (AggregatedDataWithStringKey::const_iterator it = data.begin(); it != data.end(); ++it, ++j)
|
|
|
|
|
{
|
|
|
|
|
first_column.insertData(it->first.data, it->first.size);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->insertResultInto(it->second + offsets_of_aggregate_states[i], *final_aggregate_columns[i]);
|
|
|
|
|
}
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-30 16:56:00 +00:00
|
|
|
|
else if (data_variants.type == AggregatedDataVariants::KEYS_128)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataWithKeys128 & data = data_variants.keys128;
|
2013-02-03 18:08:52 +00:00
|
|
|
|
|
2013-09-01 04:55:41 +00:00
|
|
|
|
size_t j = with_totals && !separate_totals ? 1 : 0;
|
2013-11-03 23:35:18 +00:00
|
|
|
|
|
|
|
|
|
if (!final)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-11-03 23:35:18 +00:00
|
|
|
|
for (AggregatedDataWithKeys128::const_iterator it = data.begin(); it != data.end(); ++it, ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
{
|
2013-11-03 23:35:18 +00:00
|
|
|
|
size_t offset = 0;
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
|
|
|
|
{
|
|
|
|
|
size_t size = data_variants.key_sizes[i];
|
|
|
|
|
key_columns[i]->insertData(reinterpret_cast<const char *>(&it->first) + offset, size);
|
|
|
|
|
offset += size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
(*aggregate_columns[i])[j] = it->second + offsets_of_aggregate_states[i];
|
2013-06-30 16:56:00 +00:00
|
|
|
|
}
|
2013-11-03 23:35:18 +00:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
for (AggregatedDataWithKeys128::const_iterator it = data.begin(); it != data.end(); ++it, ++j)
|
|
|
|
|
{
|
|
|
|
|
size_t offset = 0;
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
|
|
|
|
{
|
|
|
|
|
size_t size = data_variants.key_sizes[i];
|
|
|
|
|
key_columns[i]->insertData(reinterpret_cast<const char *>(&it->first) + offset, size);
|
|
|
|
|
offset += size;
|
|
|
|
|
}
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->insertResultInto(it->second + offsets_of_aggregate_states[i], *final_aggregate_columns[i]);
|
|
|
|
|
}
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-30 16:56:00 +00:00
|
|
|
|
else if (data_variants.type == AggregatedDataVariants::HASHED)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataHashed & data = data_variants.hashed;
|
|
|
|
|
|
2013-09-01 04:55:41 +00:00
|
|
|
|
size_t j = with_totals && !separate_totals ? 1 : 0;
|
2013-11-03 23:35:18 +00:00
|
|
|
|
|
|
|
|
|
if (!final)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-11-03 23:35:18 +00:00
|
|
|
|
for (AggregatedDataHashed::const_iterator it = data.begin(); it != data.end(); ++it, ++j)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
|
|
|
|
key_columns[i]->insertDataWithTerminatingZero(it->second.first[i].data, it->second.first[i].size);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
(*aggregate_columns[i])[j] = it->second.second + offsets_of_aggregate_states[i];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
for (AggregatedDataHashed::const_iterator it = data.begin(); it != data.end(); ++it, ++j)
|
|
|
|
|
{
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
|
|
|
|
key_columns[i]->insertDataWithTerminatingZero(it->second.first[i].data, it->second.first[i].size);
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->insertResultInto(it->second.second + offsets_of_aggregate_states[i], *final_aggregate_columns[i]);
|
|
|
|
|
}
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-05-06 11:45:28 +00:00
|
|
|
|
else if (data_variants.type != AggregatedDataVariants::WITHOUT_KEY)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
if (!final)
|
|
|
|
|
{
|
|
|
|
|
/// data_variants не будет уничтожать состояния агрегатных функций в деструкторе. Теперь состояниями владеют ColumnAggregateFunction.
|
|
|
|
|
data_variants.aggregator = NULL;
|
|
|
|
|
}
|
2013-02-16 18:59:05 +00:00
|
|
|
|
|
2012-02-27 06:28:20 +00:00
|
|
|
|
/// Изменяем размер столбцов-констант в блоке.
|
|
|
|
|
size_t columns = res.columns();
|
|
|
|
|
for (size_t i = 0; i < columns; ++i)
|
|
|
|
|
if (res.getByPosition(i).column->isConst())
|
2013-05-03 05:23:14 +00:00
|
|
|
|
res.getByPosition(i).column = res.getByPosition(i).column->cut(0, rows);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-02-03 18:39:09 +00:00
|
|
|
|
double elapsed_seconds = watch.elapsedSeconds();
|
2013-02-04 03:31:53 +00:00
|
|
|
|
LOG_TRACE(log, std::fixed << std::setprecision(3)
|
|
|
|
|
<< "Converted aggregated data to block. "
|
2013-02-03 18:39:09 +00:00
|
|
|
|
<< rows << " rows, " << res.bytes() / 1048576.0 << " MiB"
|
2013-02-04 03:31:53 +00:00
|
|
|
|
<< " in " << elapsed_seconds << " sec."
|
2013-02-03 18:39:09 +00:00
|
|
|
|
<< " (" << rows / elapsed_seconds << " rows/sec., " << res.bytes() / elapsed_seconds / 1048576.0 << " MiB/sec.)");
|
2012-05-31 00:33:42 +00:00
|
|
|
|
|
2012-02-27 06:28:20 +00:00
|
|
|
|
return res;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_variants)
|
|
|
|
|
{
|
|
|
|
|
if (data_variants.empty())
|
2012-02-27 07:54:16 +00:00
|
|
|
|
throw Exception("Empty data passed to Aggregator::merge().", ErrorCodes::EMPTY_DATA_PASSED);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2012-05-31 00:33:42 +00:00
|
|
|
|
LOG_TRACE(log, "Merging aggregated data");
|
|
|
|
|
|
2013-02-04 03:31:53 +00:00
|
|
|
|
Stopwatch watch;
|
|
|
|
|
|
2012-08-21 18:34:55 +00:00
|
|
|
|
AggregatedDataVariantsPtr res = data_variants[0];
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
|
|
|
|
/// Все результаты агрегации соединяем с первым.
|
2013-02-04 03:31:53 +00:00
|
|
|
|
size_t rows = res->size();
|
2012-02-27 06:28:20 +00:00
|
|
|
|
for (size_t i = 1, size = data_variants.size(); i < size; ++i)
|
|
|
|
|
{
|
2013-02-04 03:31:53 +00:00
|
|
|
|
rows += data_variants[i]->size();
|
2012-02-27 06:28:20 +00:00
|
|
|
|
AggregatedDataVariants & current = *data_variants[i];
|
|
|
|
|
|
2013-02-09 00:12:04 +00:00
|
|
|
|
res->aggregates_pools.insert(res->aggregates_pools.end(), current.aggregates_pools.begin(), current.aggregates_pools.end());
|
|
|
|
|
|
2012-05-10 07:47:13 +00:00
|
|
|
|
if (current.empty())
|
|
|
|
|
continue;
|
|
|
|
|
|
2012-08-21 18:34:55 +00:00
|
|
|
|
if (res->empty())
|
2012-05-10 07:47:13 +00:00
|
|
|
|
{
|
2012-08-21 18:34:55 +00:00
|
|
|
|
res = data_variants[i];
|
2012-05-10 07:47:13 +00:00
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-21 18:34:55 +00:00
|
|
|
|
if (res->type != current.type)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
throw Exception("Cannot merge different aggregated data variants.", ErrorCodes::CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS);
|
|
|
|
|
|
|
|
|
|
/// В какой структуре данных агрегированы данные?
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (res->type == AggregatedDataVariants::WITHOUT_KEY || with_totals)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2012-08-21 18:34:55 +00:00
|
|
|
|
AggregatedDataWithoutKey & res_data = res->without_key;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
AggregatedDataWithoutKey & current_data = current.without_key;
|
|
|
|
|
|
2013-02-08 23:41:05 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
aggregate_functions[i]->merge(res_data + offsets_of_aggregate_states[i], current_data + offsets_of_aggregate_states[i]);
|
|
|
|
|
aggregate_functions[i]->destroy(current_data + offsets_of_aggregate_states[i]);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-05-06 11:45:28 +00:00
|
|
|
|
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (res->type == AggregatedDataVariants::KEY_64)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2012-08-21 18:34:55 +00:00
|
|
|
|
AggregatedDataWithUInt64Key & res_data = res->key64;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
AggregatedDataWithUInt64Key & current_data = current.key64;
|
|
|
|
|
|
2012-02-27 16:23:28 +00:00
|
|
|
|
for (AggregatedDataWithUInt64Key::const_iterator it = current_data.begin(); it != current_data.end(); ++it)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithUInt64Key::iterator res_it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res_data.emplace(it->first, res_it, inserted);
|
|
|
|
|
|
|
|
|
|
if (!inserted)
|
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
aggregate_functions[i]->merge(res_it->second + offsets_of_aggregate_states[i], it->second + offsets_of_aggregate_states[i]);
|
|
|
|
|
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
2013-02-08 23:41:05 +00:00
|
|
|
|
res_it->second = it->second;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2012-08-21 18:34:55 +00:00
|
|
|
|
else if (res->type == AggregatedDataVariants::KEY_STRING)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2012-08-21 18:34:55 +00:00
|
|
|
|
AggregatedDataWithStringKey & res_data = res->key_string;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
AggregatedDataWithStringKey & current_data = current.key_string;
|
|
|
|
|
|
2012-02-27 16:23:28 +00:00
|
|
|
|
for (AggregatedDataWithStringKey::const_iterator it = current_data.begin(); it != current_data.end(); ++it)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2012-05-31 05:41:56 +00:00
|
|
|
|
AggregatedDataWithStringKey::iterator res_it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res_data.emplace(it->first, res_it, inserted);
|
|
|
|
|
|
|
|
|
|
if (!inserted)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
aggregate_functions[i]->merge(res_it->second + offsets_of_aggregate_states[i], it->second + offsets_of_aggregate_states[i]);
|
|
|
|
|
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
2013-02-08 23:41:05 +00:00
|
|
|
|
res_it->second = it->second;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-30 16:56:00 +00:00
|
|
|
|
else if (res->type == AggregatedDataVariants::KEYS_128)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataWithKeys128 & res_data = res->keys128;
|
|
|
|
|
AggregatedDataWithKeys128 & current_data = current.keys128;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-06-30 16:56:00 +00:00
|
|
|
|
for (AggregatedDataWithKeys128::iterator it = current_data.begin(); it != current_data.end(); ++it)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataWithKeys128::iterator res_it;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
bool inserted;
|
|
|
|
|
res_data.emplace(it->first, res_it, inserted);
|
|
|
|
|
|
|
|
|
|
if (!inserted)
|
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[i]->merge(res_it->second + offsets_of_aggregate_states[i], it->second + offsets_of_aggregate_states[i]);
|
|
|
|
|
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
2013-02-09 02:20:26 +00:00
|
|
|
|
{
|
|
|
|
|
res_it->second = it->second;
|
|
|
|
|
}
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-30 16:56:00 +00:00
|
|
|
|
else if (res->type == AggregatedDataVariants::HASHED)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataHashed & res_data = res->hashed;
|
|
|
|
|
AggregatedDataHashed & current_data = current.hashed;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
|
2013-06-30 16:56:00 +00:00
|
|
|
|
for (AggregatedDataHashed::iterator it = current_data.begin(); it != current_data.end(); ++it)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataHashed::iterator res_it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res_data.emplace(it->first, res_it, inserted);
|
|
|
|
|
|
|
|
|
|
if (!inserted)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[i]->merge(res_it->second.second + offsets_of_aggregate_states[i], it->second.second + offsets_of_aggregate_states[i]);
|
|
|
|
|
aggregate_functions[i]->destroy(it->second.second + offsets_of_aggregate_states[i]);
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
2013-06-30 16:56:00 +00:00
|
|
|
|
{
|
|
|
|
|
res_it->second = it->second;
|
|
|
|
|
}
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-05-06 11:45:28 +00:00
|
|
|
|
else if (res->type != AggregatedDataVariants::WITHOUT_KEY)
|
2012-02-27 06:28:20 +00:00
|
|
|
|
throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
2013-02-16 18:59:05 +00:00
|
|
|
|
|
|
|
|
|
/// current не будет уничтожать состояния агрегатных функций в деструкторе
|
|
|
|
|
current.aggregator = NULL;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
|
2013-02-04 03:31:53 +00:00
|
|
|
|
double elapsed_seconds = watch.elapsedSeconds();
|
|
|
|
|
size_t res_rows = res->size();
|
|
|
|
|
|
|
|
|
|
LOG_TRACE(log, std::fixed << std::setprecision(3)
|
|
|
|
|
<< "Merged aggregated data. "
|
|
|
|
|
<< "From " << rows << " to " << res_rows << " rows (efficiency: " << static_cast<double>(rows) / res_rows << ")"
|
|
|
|
|
<< " in " << elapsed_seconds << " sec."
|
|
|
|
|
<< " (" << rows / elapsed_seconds << " rows/sec.)");
|
2012-05-31 00:33:42 +00:00
|
|
|
|
|
2012-08-21 18:34:55 +00:00
|
|
|
|
return res;
|
2012-02-27 06:28:20 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2012-05-30 01:38:02 +00:00
|
|
|
|
void Aggregator::merge(BlockInputStreamPtr stream, AggregatedDataVariants & result)
|
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
StringRefs key(keys_size);
|
2013-01-08 19:41:22 +00:00
|
|
|
|
ConstColumnPlainPtrs key_columns(keys_size);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
|
|
|
|
typedef ColumnAggregateFunction::Container_t * AggregateColumn;
|
|
|
|
|
typedef std::vector<AggregateColumn> AggregateColumns;
|
|
|
|
|
AggregateColumns aggregate_columns(aggregates_size);
|
|
|
|
|
|
2013-02-13 19:24:19 +00:00
|
|
|
|
Block empty_block;
|
|
|
|
|
initialize(empty_block);
|
|
|
|
|
|
2013-02-16 18:59:05 +00:00
|
|
|
|
/// result будет уничтожать состояния агрегатных функций в деструкторе
|
|
|
|
|
result.aggregator = this;
|
|
|
|
|
|
2012-05-30 01:38:02 +00:00
|
|
|
|
/// Читаем все данные
|
|
|
|
|
while (Block block = stream->read())
|
|
|
|
|
{
|
2012-05-31 00:33:42 +00:00
|
|
|
|
LOG_TRACE(log, "Merging aggregated block");
|
|
|
|
|
|
2012-05-30 03:30:29 +00:00
|
|
|
|
if (!sample)
|
|
|
|
|
for (size_t i = 0; i < keys_size + aggregates_size; ++i)
|
|
|
|
|
sample.insert(block.getByPosition(i).cloneEmpty());
|
|
|
|
|
|
2012-05-30 01:38:02 +00:00
|
|
|
|
/// Запоминаем столбцы, с которыми будем работать
|
|
|
|
|
for (size_t i = 0; i < keys_size; ++i)
|
|
|
|
|
key_columns[i] = block.getByPosition(i).column;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_columns[i] = &dynamic_cast<ColumnAggregateFunction &>(*block.getByPosition(keys_size + i).column).getData();
|
|
|
|
|
|
|
|
|
|
size_t rows = block.rows();
|
|
|
|
|
|
|
|
|
|
/// Каким способом выполнять агрегацию?
|
|
|
|
|
Sizes key_sizes;
|
2013-06-30 16:56:00 +00:00
|
|
|
|
result.type = chooseAggregationMethod(key_columns, key_sizes);
|
2013-02-09 02:20:26 +00:00
|
|
|
|
result.keys_size = keys_size;
|
2013-06-30 16:56:00 +00:00
|
|
|
|
result.key_sizes = key_sizes;
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (result.type == AggregatedDataVariants::WITHOUT_KEY || with_totals)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithoutKey & res = result.without_key;
|
2013-02-08 23:41:05 +00:00
|
|
|
|
if (!res)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
res = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
|
|
|
|
|
2012-05-30 01:38:02 +00:00
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2013-02-08 23:41:05 +00:00
|
|
|
|
aggregate_functions[i]->create(res + offsets_of_aggregate_states[i]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
2012-05-30 03:30:29 +00:00
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
2013-02-08 23:41:05 +00:00
|
|
|
|
aggregate_functions[i]->merge(res + offsets_of_aggregate_states[i], (*aggregate_columns[i])[0]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
2013-05-06 11:45:28 +00:00
|
|
|
|
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (result.type == AggregatedDataVariants::KEY_64)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithUInt64Key & res = result.key64;
|
2013-01-08 19:41:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
2013-05-04 15:46:50 +00:00
|
|
|
|
for (size_t i = with_totals ? 1 : 0; i < rows; ++i)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
2013-02-08 20:34:30 +00:00
|
|
|
|
UInt64 key = get<UInt64>(column[i]);
|
|
|
|
|
|
2012-05-30 01:38:02 +00:00
|
|
|
|
AggregatedDataWithUInt64Key::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res.emplace(key, it, inserted);
|
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
2013-02-08 23:41:05 +00:00
|
|
|
|
it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-08 23:41:05 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second + offsets_of_aggregate_states[j]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-13 20:13:28 +00:00
|
|
|
|
aggregate_functions[j]->merge(it->second + offsets_of_aggregate_states[j], (*aggregate_columns[j])[i]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (result.type == AggregatedDataVariants::KEY_STRING)
|
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithStringKey & res = result.key_string;
|
2013-01-08 19:41:22 +00:00
|
|
|
|
const IColumn & column = *key_columns[0];
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
if (const ColumnString * column_string = dynamic_cast<const ColumnString *>(&column))
|
|
|
|
|
{
|
|
|
|
|
const ColumnString::Offsets_t & offsets = column_string->getOffsets();
|
2013-05-05 15:25:25 +00:00
|
|
|
|
const ColumnString::Chars_t & data = column_string->getChars();
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
/// Для всех строчек
|
2013-05-04 15:46:50 +00:00
|
|
|
|
for (size_t i = with_totals ? 1 : 0; i < rows; ++i)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
2012-05-31 05:41:56 +00:00
|
|
|
|
/// Строим ключ
|
2013-04-13 00:56:07 +00:00
|
|
|
|
StringRef ref(&data[i == 0 ? 0 : offsets[i - 1]], (i == 0 ? offsets[i] : (offsets[i] - offsets[i - 1])) - 1);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
AggregatedDataWithStringKey::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res.emplace(ref, it, inserted);
|
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
|
|
|
|
it->first.data = result.string_pool.insert(ref.data, ref.size);
|
2013-02-08 23:41:05 +00:00
|
|
|
|
it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-08 23:41:05 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second + offsets_of_aggregate_states[j]);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
2012-05-30 01:38:02 +00:00
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-13 20:13:28 +00:00
|
|
|
|
aggregate_functions[j]->merge(it->second + offsets_of_aggregate_states[j], (*aggregate_columns[j])[i]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
2012-05-31 05:41:56 +00:00
|
|
|
|
}
|
|
|
|
|
else if (const ColumnFixedString * column_string = dynamic_cast<const ColumnFixedString *>(&column))
|
|
|
|
|
{
|
|
|
|
|
size_t n = column_string->getN();
|
2013-05-05 15:25:25 +00:00
|
|
|
|
const ColumnFixedString::Chars_t & data = column_string->getChars();
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
2012-05-31 05:41:56 +00:00
|
|
|
|
/// Для всех строчек
|
2013-05-04 15:46:50 +00:00
|
|
|
|
for (size_t i = with_totals ? 1 : 0; i < rows; ++i)
|
2012-05-31 05:41:56 +00:00
|
|
|
|
{
|
|
|
|
|
/// Строим ключ
|
|
|
|
|
StringRef ref(&data[i * n], n);
|
|
|
|
|
|
|
|
|
|
AggregatedDataWithStringKey::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res.emplace(ref, it, inserted);
|
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
|
|
|
|
it->first.data = result.string_pool.insert(ref.data, ref.size);
|
2013-02-08 23:41:05 +00:00
|
|
|
|
it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-08 23:41:05 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second + offsets_of_aggregate_states[j]);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-02-13 20:13:28 +00:00
|
|
|
|
aggregate_functions[j]->merge(it->second + offsets_of_aggregate_states[j], (*aggregate_columns[j])[i]);
|
2012-05-31 05:41:56 +00:00
|
|
|
|
}
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
2012-05-31 05:41:56 +00:00
|
|
|
|
else
|
|
|
|
|
throw Exception("Illegal type of column when aggregating by string key: " + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
2013-06-30 16:56:00 +00:00
|
|
|
|
else if (result.type == AggregatedDataVariants::KEYS_128)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataWithKeys128 & res = result.keys128;
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
2013-05-04 15:46:50 +00:00
|
|
|
|
for (size_t i = with_totals ? 1 : 0; i < rows; ++i)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataWithKeys128::iterator it;
|
2012-05-30 01:38:02 +00:00
|
|
|
|
bool inserted;
|
2013-06-30 16:56:00 +00:00
|
|
|
|
res.emplace(pack128(i, keys_size, key_columns, key_sizes), it, inserted);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
|
|
|
|
if (inserted)
|
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
it->second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second + offsets_of_aggregate_states[j]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[j]->merge(it->second + offsets_of_aggregate_states[j], (*aggregate_columns[j])[i]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-06-30 16:56:00 +00:00
|
|
|
|
else if (result.type == AggregatedDataVariants::HASHED)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataHashed & res = result.hashed;
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
|
|
|
|
/// Для всех строчек
|
2013-05-04 15:46:50 +00:00
|
|
|
|
for (size_t i = with_totals ? 1 : 0; i < rows; ++i)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
AggregatedDataHashed::iterator it;
|
|
|
|
|
bool inserted;
|
|
|
|
|
res.emplace(hash128(i, keys_size, key_columns, key), it, inserted);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
2013-06-30 16:56:00 +00:00
|
|
|
|
if (inserted)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
{
|
2013-06-30 16:56:00 +00:00
|
|
|
|
it->second.first = placeKeysInPool(i, keys_size, key, result.keys_pool);
|
|
|
|
|
it->second.second = result.aggregates_pool->alloc(total_size_of_aggregate_states);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[j]->create(it->second.second + offsets_of_aggregate_states[j]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Добавляем значения
|
|
|
|
|
for (size_t j = 0; j < aggregates_size; ++j)
|
2013-06-30 16:56:00 +00:00
|
|
|
|
aggregate_functions[j]->merge(it->second.second + offsets_of_aggregate_states[j], (*aggregate_columns[j])[i]);
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2013-05-06 11:45:28 +00:00
|
|
|
|
else if (result.type != AggregatedDataVariants::WITHOUT_KEY)
|
2012-05-30 01:38:02 +00:00
|
|
|
|
throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
2012-05-31 00:33:42 +00:00
|
|
|
|
|
|
|
|
|
LOG_TRACE(log, "Merged aggregated block");
|
2012-05-30 01:38:02 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2013-02-16 18:59:05 +00:00
|
|
|
|
void Aggregator::destroyAggregateStates(AggregatedDataVariants & result)
|
|
|
|
|
{
|
|
|
|
|
if (result.size() == 0)
|
|
|
|
|
return;
|
|
|
|
|
|
2013-11-03 23:35:18 +00:00
|
|
|
|
LOG_TRACE(log, "Destroying aggregate states");
|
2013-02-16 18:59:05 +00:00
|
|
|
|
|
|
|
|
|
/// В какой структуре данных агрегированы данные?
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (result.type == AggregatedDataVariants::WITHOUT_KEY || with_totals)
|
2013-02-16 18:59:05 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithoutKey & res_data = result.without_key;
|
|
|
|
|
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]);
|
|
|
|
|
}
|
2013-05-04 15:46:50 +00:00
|
|
|
|
if (result.type == AggregatedDataVariants::KEY_64)
|
2013-02-16 18:59:05 +00:00
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithUInt64Key & res_data = result.key64;
|
|
|
|
|
|
|
|
|
|
for (AggregatedDataWithUInt64Key::const_iterator it = res_data.begin(); it != res_data.end(); ++it)
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
|
|
|
|
|
}
|
|
|
|
|
else if (result.type == AggregatedDataVariants::KEY_STRING)
|
|
|
|
|
{
|
|
|
|
|
AggregatedDataWithStringKey & res_data = result.key_string;
|
|
|
|
|
|
|
|
|
|
for (AggregatedDataWithStringKey::const_iterator it = res_data.begin(); it != res_data.end(); ++it)
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
|
|
|
|
|
}
|
|
|
|
|
else if (result.type == AggregatedDataVariants::HASHED)
|
|
|
|
|
{
|
|
|
|
|
AggregatedDataHashed & res_data = result.hashed;
|
|
|
|
|
|
|
|
|
|
for (AggregatedDataHashed::iterator it = res_data.begin(); it != res_data.end(); ++it)
|
|
|
|
|
for (size_t i = 0; i < aggregates_size; ++i)
|
|
|
|
|
aggregate_functions[i]->destroy(it->second.second + offsets_of_aggregate_states[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-05-03 10:20:53 +00:00
|
|
|
|
|
|
|
|
|
String Aggregator::getID() const
|
|
|
|
|
{
|
|
|
|
|
std::stringstream res;
|
|
|
|
|
|
|
|
|
|
if (keys.empty())
|
|
|
|
|
{
|
|
|
|
|
res << "key_names";
|
|
|
|
|
for (size_t i = 0; i < key_names.size(); ++i)
|
|
|
|
|
res << ", " << key_names[i];
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
res << "keys";
|
|
|
|
|
for (size_t i = 0; i < keys.size(); ++i)
|
|
|
|
|
res << ", " << keys[i];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
res << ", aggregates";
|
|
|
|
|
for (size_t i = 0; i < aggregates.size(); ++i)
|
|
|
|
|
res << ", " << aggregates[i].column_name;
|
|
|
|
|
|
|
|
|
|
return res.str();
|
|
|
|
|
}
|
|
|
|
|
|
2011-09-19 01:42:16 +00:00
|
|
|
|
}
|