ClickHouse/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h

#pragma once

#include <DB/Interpreters/Aggregator.h>
#include <DB/IO/ReadBufferFromFile.h>
#include <DB/IO/CompressedReadBuffer.h>
#include <DB/DataStreams/IProfilingBlockInputStream.h>
#include <DB/DataStreams/BlocksListBlockInputStream.h>
#include <DB/DataStreams/NativeBlockInputStream.h>
#include <DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h>
#include <DB/DataStreams/ParallelInputsProcessor.h>
#include <common/Revision.h>


namespace DB
{

using Poco::SharedPtr;


/** Агрегирует несколько источников параллельно.
  * Производит агрегацию блоков из разных источников независимо в разных потоках, затем объединяет результаты.
  * Если final == false, агрегатные функции не финализируются, то есть, не заменяются на своё значение, а содержат промежуточное состояние вычислений.
  * Это необходимо, чтобы можно было продолжить агрегацию (например, объединяя потоки частично агрегированных данных).
  */
class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream
{
public:
	/** Столбцы из key_names и аргументы агрегатных функций, уже должны быть вычислены.
	  */
	ParallelAggregatingBlockInputStream(
		BlockInputStreams inputs, BlockInputStreamPtr additional_input_at_end,
		const Aggregator::Params & params_, bool final_, size_t max_threads_)
		: params(params_), aggregator(params),
		final(final_), max_threads(std::min(inputs.size(), max_threads_)),
		keys_size(params.keys_size), aggregates_size(params.aggregates_size),
		handler(*this), processor(inputs, additional_input_at_end, max_threads, handler)
	{
		children = inputs;
		if (additional_input_at_end)
			children.push_back(additional_input_at_end);
	}

	String getName() const override { return "ParallelAggregating"; }

	String getID() const override
	{
		std::stringstream res;
		res << "ParallelAggregating(";

		Strings children_ids(children.size());
		for (size_t i = 0; i < children.size(); ++i)
			children_ids[i] = children[i]->getID();

		/// Порядок не имеет значения.
		std::sort(children_ids.begin(), children_ids.end());

		for (size_t i = 0; i < children_ids.size(); ++i)
			res << (i == 0 ? "" : ", ") << children_ids[i];

		res << ", " << aggregator.getID() << ")";
		return res.str();
	}

	void cancel() override
	{
		bool old_val = false;
		if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
			return;

		processor.cancel();
	}

protected:
	Block readImpl() override
	{
		if (!executed)
		{
			executed = true;

			Aggregator::CancellationHook hook = [&]() { return this->isCancelled(); };
			aggregator.setCancellationHook(hook);

			execute();

			if (isCancelled())
				return {};

			if (!aggregator.hasTemporaryFiles())
			{
				/** Если все частично-агрегированные данные в оперативке, то мерджим их параллельно, тоже в оперативке.
				  * NOTE Если израсходовано больше половины допустимой памяти, то мерджить следовало бы более экономно.
				  */
				AggregatedDataVariantsPtr data_variants = aggregator.merge(many_data, max_threads);

				if (data_variants)
					impl.reset(new BlocksListBlockInputStream(
						aggregator.convertToBlocks(*data_variants, final, max_threads)));
			}
			else
			{
				/** Если есть временные файлы с частично-агрегированными данными на диске,
				  *  то читаем и мерджим их, расходуя минимальное количество памяти.
				  */

				/// Сбросим имеющиеся в оперативке данные тоже на диск. Так проще. NOTE Это можно делать параллельно.
				for (AggregatedDataVariantsPtr & data : many_data)
				{
					size_t rows = data->sizeWithoutOverflowRow();
					if (rows)
						aggregator.writeToTemporaryFile(*data, rows);
				}

				const auto & files = aggregator.getTemporaryFiles();
				BlockInputStreams input_streams;
				for (const auto & file : files.files)
				{
					temporary_inputs.emplace_back(new TemporaryFileStream(file->path()));
					input_streams.emplace_back(temporary_inputs.back()->block_in);
				}

				LOG_TRACE(log, "Will merge " << files.files.size() << " temporary files of size "
					<< (files.sum_size_compressed / 1048576.0) << " MiB compressed, "
					<< (files.sum_size_uncompressed / 1048576.0) << " MiB uncompressed.");

				impl.reset(new MergingAggregatedMemoryEfficientBlockInputStream(input_streams, params, final));
			}
		}

		Block res;
		if (isCancelled() || !impl)
			return res;

		return impl->read();
	}

private:
	Aggregator::Params params;
	Aggregator aggregator;
	bool final;
	size_t max_threads;

	size_t keys_size;
	size_t aggregates_size;

	/** Используется, если есть ограничение на максимальное количество строк при агрегации,
	  *  и если group_by_overflow_mode == ANY.
	  * В этом случае, новые ключи не добавляются в набор, а производится агрегация только по
	  *  ключам, которые уже успели попасть в набор.
	  */
	bool no_more_keys = false;

	bool executed = false;

	/// Для чтения сброшенных во временный файл данных.
	struct TemporaryFileStream
	{
		ReadBufferFromFile file_in;
		CompressedReadBuffer compressed_in;
		BlockInputStreamPtr block_in;

		TemporaryFileStream(const std::string & path)
			: file_in(path), compressed_in(file_in), block_in(new NativeBlockInputStream(compressed_in, Revision::get())) {}
	};
	std::vector<std::unique_ptr<TemporaryFileStream>> temporary_inputs;

	/** Отсюда будем доставать готовые блоки после агрегации.
	  */
	std::unique_ptr<IBlockInputStream> impl;

	Logger * log = &Logger::get("ParallelAggregatingBlockInputStream");


	ManyAggregatedDataVariants many_data;
	Exceptions exceptions;

	struct ThreadData
	{
		size_t src_rows = 0;
		size_t src_bytes = 0;

		StringRefs key;
		ConstColumnPlainPtrs key_columns;
		Aggregator::AggregateColumns aggregate_columns;
		Sizes key_sizes;

		ThreadData(size_t keys_size, size_t aggregates_size)
		{
			key.resize(keys_size);
			key_columns.resize(keys_size);
			aggregate_columns.resize(aggregates_size);
			key_sizes.resize(keys_size);
		}
	};

	std::vector<ThreadData> threads_data;


	struct Handler
	{
		Handler(ParallelAggregatingBlockInputStream & parent_)
			: parent(parent_) {}

		void onBlock(Block & block, size_t thread_num)
		{
			parent.aggregator.executeOnBlock(block, *parent.many_data[thread_num],
				parent.threads_data[thread_num].key_columns, parent.threads_data[thread_num].aggregate_columns,
				parent.threads_data[thread_num].key_sizes, parent.threads_data[thread_num].key,
				parent.no_more_keys);

			parent.threads_data[thread_num].src_rows += block.rowsInFirstColumn();
			parent.threads_data[thread_num].src_bytes += block.bytes();
		}

		void onFinish()
		{
		}

		void onException(std::exception_ptr & exception, size_t thread_num)
		{
			parent.exceptions[thread_num] = exception;
			parent.cancel();
		}

		ParallelAggregatingBlockInputStream & parent;
	};

	Handler handler;
	ParallelInputsProcessor<Handler> processor;


	void execute()
	{
		many_data.resize(max_threads);
		exceptions.resize(max_threads);

		for (size_t i = 0; i < max_threads; ++i)
			threads_data.emplace_back(keys_size, aggregates_size);

		LOG_TRACE(log, "Aggregating");

		Stopwatch watch;

		for (auto & elem : many_data)
			elem = new AggregatedDataVariants;

		processor.process();
		processor.wait();

		rethrowFirstException(exceptions);

		if (isCancelled())
			return;

		double elapsed_seconds = watch.elapsedSeconds();

		size_t total_src_rows = 0;
		size_t total_src_bytes = 0;
		for (size_t i = 0; i < max_threads; ++i)
		{
			size_t rows = many_data[i]->size();
			LOG_TRACE(log, std::fixed << std::setprecision(3)
				<< "Aggregated. " << threads_data[i].src_rows << " to " << rows << " rows"
					<< " (from " << threads_data[i].src_bytes / 1048576.0 << " MiB)"
				<< " in " << elapsed_seconds << " sec."
				<< " (" << threads_data[i].src_rows / elapsed_seconds << " rows/sec., "
					<< threads_data[i].src_bytes / elapsed_seconds / 1048576.0 << " MiB/sec.)");

			total_src_rows += threads_data[i].src_rows;
			total_src_bytes += threads_data[i].src_bytes;
		}
		LOG_TRACE(log, std::fixed << std::setprecision(3)
			<< "Total aggregated. " << total_src_rows << " rows (from " << total_src_bytes / 1048576.0 << " MiB)"
			<< " in " << elapsed_seconds << " sec."
			<< " (" << total_src_rows / elapsed_seconds << " rows/sec., " << total_src_bytes / elapsed_seconds / 1048576.0 << " MiB/sec.)");
	}
};

}
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+								#pragma once
 								#include <DB/Interpreters/Aggregator.h>
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+								#include <DB/IO/ReadBufferFromFile.h>
 								#include <DB/IO/CompressedReadBuffer.h>
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+								#include <DB/DataStreams/IProfilingBlockInputStream.h>
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+								#include <DB/DataStreams/BlocksListBlockInputStream.h>
 								#include <DB/DataStreams/NativeBlockInputStream.h>
 								#include <DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h>
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+								#include <DB/DataStreams/ParallelInputsProcessor.h>
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+								#include <common/Revision.h>
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
 								namespace DB
 								{
 								using Poco::SharedPtr;
 								/** Агрегирует несколько источников параллельно.
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+								  * Производит агрегацию блоков из разных источников независимо в разных потоках, затем объединяет результаты.
-												dbms: tiny fix [#METR-2944].

											
										
										
											2014-11-30 06:59:03 +00:00
+								  * Если final == false, агрегатные функции не финализируются, то есть, не заменяются на своё значение, а содержат промежуточное состояние вычислений.
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+								  * Это необходимо, чтобы можно было продолжить агрегацию (например, объединяя потоки частично агрегированных данных).
 								  */
 								class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream
 								{
 								public:
-												clickhouse: totals mode: development. [#METR-9365]

											
										
										
											2014-02-26 11:44:54 +00:00
+									/** Столбцы из key_names и аргументы агрегатных функций, уже должны быть вычислены.
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+									  */
-												dbms: FULL and RIGHT JOIN: preparation [#METR-15418].

											
										
										
											2015-07-21 21:29:02 +00:00
+									ParallelAggregatingBlockInputStream(
 										BlockInputStreams inputs, BlockInputStreamPtr additional_input_at_end,
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+										const Aggregator::Params & params_, bool final_, size_t max_threads_)
 										: params(params_), aggregator(params),
-												dbms: tiny modifications [#METR-2944].

											
										
										
											2015-01-08 18:52:48 +00:00
+										final(final_), max_threads(std::min(inputs.size(), max_threads_)),
-												dbms: Aggregator: preparation [#METR-17000].

											
										
										
											2015-11-30 16:57:05 +00:00
+										keys_size(params.keys_size), aggregates_size(params.aggregates_size),
-												dbms: FULL and RIGHT JOIN: preparation [#METR-15418].

											
										
										
											2015-07-21 21:29:02 +00:00
+										handler(*this), processor(inputs, additional_input_at_end, max_threads, handler)
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+									{
-												dbms: FULL and RIGHT JOIN: preparation [#METR-15418].

											
										
										
											2015-07-21 21:29:02 +00:00
+										children = inputs;
 										if (additional_input_at_end)
 											children.push_back(additional_input_at_end);
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+									}
-												dbms: removed useless code [#METR-16739].

											
										
										
											2015-06-08 20:22:02 +00:00
+									String getName() const override { return "ParallelAggregating"; }
-												dbms: development [#CONV-2944].



											
										
										
											2012-10-20 02:10:47 +00:00
-												dbms: showing progress for INSERT SELECT query [#METR-13612].

											
										
										
											2014-11-08 23:52:18 +00:00
+									String getID() const override
-												dbms: development of multi-queries [#CONV-2944].



											
										
										
											2013-05-03 10:20:53 +00:00
+									{
 										std::stringstream res;
 										res << "ParallelAggregating(";
 										Strings children_ids(children.size());
 										for (size_t i = 0; i < children.size(); ++i)
 											children_ids[i] = children[i]->getID();
 										/// Порядок не имеет значения.
 										std::sort(children_ids.begin(), children_ids.end());
 										for (size_t i = 0; i < children_ids.size(); ++i)
 											res << (i == 0 ? "" : ", ") << children_ids[i];
-												dbms: tiny modifications [#METR-2944].

											
										
										
											2015-01-08 18:52:48 +00:00
+										res << ", " << aggregator.getID() << ")";
-												dbms: development of multi-queries [#CONV-2944].



											
										
										
											2013-05-03 10:20:53 +00:00
+										return res.str();
 									}
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+									void cancel() override
 									{
-												dbms: Server: fixed issue with query cancellation. [#METR-14410]

											
										
										
											2015-03-20 16:20:47 +00:00
+										bool old_val = false;
 										if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+											return;
 										processor.cancel();
 									}
-												dbms: development [#CONV-2944].



											
										
										
											2012-10-20 02:10:47 +00:00
+								protected:
-												dbms: showing progress for INSERT SELECT query [#METR-13612].

											
										
										
											2014-11-08 23:52:18 +00:00
+									Block readImpl() override
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+									{
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
+										if (!executed)
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+										{
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
+											executed = true;
-												Merge

											
										
										
											2015-04-16 14:27:56 +00:00
 											Aggregator::CancellationHook hook = [&]() { return this->isCancelled(); };
 											aggregator.setCancellationHook(hook);
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+											execute();
 											if (isCancelled())
 												return {};
 											if (!aggregator.hasTemporaryFiles())
 											{
 												/** Если все частично-агрегированные данные в оперативке, то мерджим их параллельно, тоже в оперативке.
 												  * NOTE Если израсходовано больше половины допустимой памяти, то мерджить следовало бы более экономно.
 												  */
 												AggregatedDataVariantsPtr data_variants = aggregator.merge(many_data, max_threads);
 												if (data_variants)
 													impl.reset(new BlocksListBlockInputStream(
 														aggregator.convertToBlocks(*data_variants, final, max_threads)));
 											}
 											else
 											{
 												/** Если есть временные файлы с частично-агрегированными данными на диске,
 												  *  то читаем и мерджим их, расходуя минимальное количество памяти.
 												  */
 												/// Сбросим имеющиеся в оперативке данные тоже на диск. Так проще. NOTE Это можно делать параллельно.
 												for (AggregatedDataVariantsPtr & data : many_data)
 												{
 													size_t rows = data->sizeWithoutOverflowRow();
 													if (rows)
 														aggregator.writeToTemporaryFile(*data, rows);
 												}
 												const auto & files = aggregator.getTemporaryFiles();
 												BlockInputStreams input_streams;
-												dbms: external aggregation: development [#METR-17000].

											
										
										
											2015-12-01 16:58:15 +00:00
+												for (const auto & file : files.files)
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+												{
 													temporary_inputs.emplace_back(new TemporaryFileStream(file->path()));
 													input_streams.emplace_back(temporary_inputs.back()->block_in);
 												}
-												dbms: external aggregation: development [#METR-17000].

											
										
										
											2015-12-01 16:58:15 +00:00
+												LOG_TRACE(log, "Will merge " << files.files.size() << " temporary files of size "
 													<< (files.sum_size_compressed / 1048576.0) << " MiB compressed, "
 													<< (files.sum_size_uncompressed / 1048576.0) << " MiB uncompressed.");
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+												impl.reset(new MergingAggregatedMemoryEfficientBlockInputStream(input_streams, params, final));
 											}
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+										}
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
+										Block res;
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+										if (isCancelled() || !impl)
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
+											return res;
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+										return impl->read();
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+									}
 								private:
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+									Aggregator::Params params;
-												dbms: tiny modifications [#METR-2944].

											
										
										
											2015-01-08 18:52:48 +00:00
+									Aggregator aggregator;
-												dbms: merged convertToBlock and finalize aggregation steps whenever possible [#METR-2944].



											
										
										
											2013-11-03 23:35:18 +00:00
+									bool final;
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+									size_t max_threads;
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+									size_t keys_size;
 									size_t aggregates_size;
 									/** Используется, если есть ограничение на максимальное количество строк при агрегации,
 									  *  и если group_by_overflow_mode == ANY.
 									  * В этом случае, новые ключи не добавляются в набор, а производится агрегация только по
 									  *  ключам, которые уже успели попасть в набор.
 									  */
 									bool no_more_keys = false;
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
+									bool executed = false;
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
 									/// Для чтения сброшенных во временный файл данных.
 									struct TemporaryFileStream
 									{
 										ReadBufferFromFile file_in;
 										CompressedReadBuffer compressed_in;
 										BlockInputStreamPtr block_in;
 										TemporaryFileStream(const std::string & path)
 											: file_in(path), compressed_in(file_in), block_in(new NativeBlockInputStream(compressed_in, Revision::get())) {}
 									};
 									std::vector<std::unique_ptr<TemporaryFileStream>> temporary_inputs;
 									/** Отсюда будем доставать готовые блоки после агрегации.
 									  */
 									std::unique_ptr<IBlockInputStream> impl;
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+									Logger * log = &Logger::get("ParallelAggregatingBlockInputStream");
-												dbms: fixed possible error [#METR-15530].

											
										
										
											2015-03-18 02:48:36 +00:00
+									ManyAggregatedDataVariants many_data;
 									Exceptions exceptions;
 									struct ThreadData
 									{
 										size_t src_rows = 0;
 										size_t src_bytes = 0;
 										StringRefs key;
 										ConstColumnPlainPtrs key_columns;
 										Aggregator::AggregateColumns aggregate_columns;
 										Sizes key_sizes;
 										ThreadData(size_t keys_size, size_t aggregates_size)
 										{
 											key.resize(keys_size);
 											key_columns.resize(keys_size);
 											aggregate_columns.resize(aggregates_size);
 											key_sizes.resize(keys_size);
 										}
 									};
 									std::vector<ThreadData> threads_data;
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+									struct Handler
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+									{
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+										Handler(ParallelAggregatingBlockInputStream & parent_)
 											: parent(parent_) {}
-												dbms: tracking amount of memory usage per query [#METR-11015].

											
										
										
											2014-05-03 22:57:43 +00:00
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+										void onBlock(Block & block, size_t thread_num)
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+										{
-												dbms: tiny modifications [#METR-2944].

											
										
										
											2015-01-08 18:52:48 +00:00
+											parent.aggregator.executeOnBlock(block, *parent.many_data[thread_num],
-												dbms: more uniform aggregation: better [#METR-2944].

											
										
										
											2014-12-14 23:07:57 +00:00
+												parent.threads_data[thread_num].key_columns, parent.threads_data[thread_num].aggregate_columns,
-												dbms: added setting 'group_by_two_level_threshold' for fine tuning [#METR-2944].

											
										
										
											2015-02-22 05:51:16 +00:00
+												parent.threads_data[thread_num].key_sizes, parent.threads_data[thread_num].key,
 												parent.no_more_keys);
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
-												dbms: more uniform aggregation: better [#METR-2944].

											
										
										
											2014-12-14 23:07:57 +00:00
+											parent.threads_data[thread_num].src_rows += block.rowsInFirstColumn();
 											parent.threads_data[thread_num].src_bytes += block.bytes();
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+										}
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
 										void onFinish()
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+										{
 										}
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
-												Probably better [#METR-17973].

											
										
										
											2015-10-05 05:40:27 +00:00
+										void onException(std::exception_ptr & exception, size_t thread_num)
-												dbms: more uniform load of threads while aggragation [#METR-2944].

											
										
										
											2014-11-30 18:22:57 +00:00
+										{
 											parent.exceptions[thread_num] = exception;
 											parent.cancel();
 										}
 										ParallelAggregatingBlockInputStream & parent;
 									};
 									Handler handler;
 									ParallelInputsProcessor<Handler> processor;
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+									void execute()
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
+									{
 										many_data.resize(max_threads);
 										exceptions.resize(max_threads);
 										for (size_t i = 0; i < max_threads; ++i)
 											threads_data.emplace_back(keys_size, aggregates_size);
 										LOG_TRACE(log, "Aggregating");
 										Stopwatch watch;
 										for (auto & elem : many_data)
 											elem = new AggregatedDataVariants;
 										processor.process();
 										processor.wait();
 										rethrowFirstException(exceptions);
 										if (isCancelled())
-												dbms: external aggregation: initial implementation [#METR-17000].

											
										
										
											2015-12-01 14:09:05 +00:00
+											return;
-												dbms: more scalable aggregator: development [#METR-2944].

											
										
										
											2015-01-02 03:16:28 +00:00
 										double elapsed_seconds = watch.elapsedSeconds();
 										size_t total_src_rows = 0;
 										size_t total_src_bytes = 0;
 										for (size_t i = 0; i < max_threads; ++i)
 										{
 											size_t rows = many_data[i]->size();
 											LOG_TRACE(log, std::fixed << std::setprecision(3)
 												<< "Aggregated. " << threads_data[i].src_rows << " to " << rows << " rows"
 													<< " (from " << threads_data[i].src_bytes / 1048576.0 << " MiB)"
 												<< " in " << elapsed_seconds << " sec."
 												<< " (" << threads_data[i].src_rows / elapsed_seconds << " rows/sec., "
 													<< threads_data[i].src_bytes / elapsed_seconds / 1048576.0 << " MiB/sec.)");
 											total_src_rows += threads_data[i].src_rows;
 											total_src_bytes += threads_data[i].src_bytes;
 										}
 										LOG_TRACE(log, std::fixed << std::setprecision(3)
 											<< "Total aggregated. " << total_src_rows << " rows (from " << total_src_bytes / 1048576.0 << " MiB)"
 											<< " in " << elapsed_seconds << " sec."
 											<< " (" << total_src_rows / elapsed_seconds << " rows/sec., " << total_src_bytes / elapsed_seconds / 1048576.0 << " MiB/sec.)");
 									}
-												dbms: development [#CONV-2944].



											
										
										
											2012-02-27 06:28:20 +00:00
+								};
 								}