dbms: compiled aggregator: development [#METR-2944].

2024-11-22 07:31:57 +00:00 · 2015-01-10 05:30:03 +03:00 · 2015-01-10 05:30:03 +03:00 · a7a2712630
commit a7a2712630
parent 6673251a20
19 changed files with 450 additions and 275 deletions
--- a/copy_headers.sh
+++ b/copy_headers.sh
@ -8,7 +8,7 @@
 SOURCE_PATH=${1:-.}
 DST=${2:-$SOURCE_PATH/../headers};

-for i in $(/usr/bin/c++ -M -xc++ -std=gnu++1y -Wall -Werror -march=native -O3 -g -shared -fPIC -rdynamic \
+for i in $(clang -M -xc++ -std=gnu++1y -Wall -Werror -march=native -O3 -g -fPIC \
 	$(cat $SOURCE_PATH/CMakeLists.txt | grep include_directories | grep -v METRICA_BINARY_DIR | sed -e "s!\${METRICA_SOURCE_DIR}!$SOURCE_PATH!; s!include_directories (!-I !; s!)!!;" | tr '\n' ' ') \
 	$SOURCE_PATH/dbms/include/DB/Interpreters/SpecializedAggregator.h |
 	tr -d '\\' |
--- a/dbms/include/DB/Common/UInt128.h
+++ b/dbms/include/DB/Common/UInt128.h
@ -1,3 +1,5 @@
+#pragma once
+
 #include <DB/Common/HashTable/Hash.h>
 #include <DB/IO/ReadHelpers.h>
 #include <DB/IO/WriteHelpers.h>
--- a/dbms/include/DB/Core/Block.h
+++ b/dbms/include/DB/Core/Block.h
@ -132,7 +132,7 @@ bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs);

 namespace std
 {
-	template<> inline void swap<DB::Block>(DB::Block & one, DB::Block & another) noexcept
+	template<> inline void swap<DB::Block>(DB::Block & one, DB::Block & another)
 	{
 		one.swap(another);
 	}
--- a/dbms/include/DB/DataStreams/AggregatingBlockInputStream.h
+++ b/dbms/include/DB/DataStreams/AggregatingBlockInputStream.h
@ -19,8 +19,9 @@ class AggregatingBlockInputStream : public IProfilingBlockInputStream
 {
 public:
 	AggregatingBlockInputStream(BlockInputStreamPtr input_, const ColumnNumbers & keys_, AggregateDescriptions & aggregates_,
-		bool overflow_row_, bool final_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_)
-		: aggregator(new Aggregator(keys_, aggregates_, overflow_row_, max_rows_to_group_by_, group_by_overflow_mode_)),
+		bool overflow_row_, bool final_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
+		Compiler * compiler_, UInt32 min_count_to_compile_)
+		: aggregator(keys_, aggregates_, overflow_row_, max_rows_to_group_by_, group_by_overflow_mode_, compiler_, min_count_to_compile_),
 		final(final_)
 	{
 		children.push_back(input_);
@ -31,21 +32,27 @@ public:
 	  * Столбцы, соответствующие keys и аргументам агрегатных функций, уже должны быть вычислены.
 	  */
 	AggregatingBlockInputStream(BlockInputStreamPtr input_, const Names & key_names, const AggregateDescriptions & aggregates,
-		bool overflow_row_, bool final_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_);
+		bool overflow_row_, bool final_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
+		Compiler * compiler_, UInt32 min_count_to_compile_)
+		: aggregator(key_names, aggregates, overflow_row_, max_rows_to_group_by_, group_by_overflow_mode_, compiler_, min_count_to_compile_),
+		final(final_)
+	{
+		children.push_back(input_);
+	}

 	String getName() const override { return "AggregatingBlockInputStream"; }

 	String getID() const override
 	{
 		std::stringstream res;
-		res << "Aggregating(" << children.back()->getID() << ", " << aggregator->getID() << ")";
+		res << "Aggregating(" << children.back()->getID() << ", " << aggregator.getID() << ")";
 		return res.str();
 	}

 protected:
 	Block readImpl() override;

-	SharedPtr<Aggregator> aggregator;
+	Aggregator aggregator;
 	bool final;

 	bool executed = false;
--- a/dbms/include/DB/DataStreams/MergingAggregatedBlockInputStream.h
+++ b/dbms/include/DB/DataStreams/MergingAggregatedBlockInputStream.h
@ -18,14 +18,16 @@ class MergingAggregatedBlockInputStream : public IProfilingBlockInputStream
 public:
 	MergingAggregatedBlockInputStream(BlockInputStreamPtr input_, const ColumnNumbers & keys_,
 		const AggregateDescriptions & aggregates_, bool overflow_row_, bool final_, size_t max_threads_)
-		: aggregator(new Aggregator(keys_, aggregates_, overflow_row_)), final(final_), max_threads(max_threads_)
+		: aggregator(keys_, aggregates_, overflow_row_, 0, OverflowMode::THROW, nullptr, 0),
+		final(final_), max_threads(max_threads_)
 	{
 		children.push_back(input_);
 	}

 	MergingAggregatedBlockInputStream(BlockInputStreamPtr input_, const Names & keys_names_,
 		const AggregateDescriptions & aggregates_, bool overflow_row_, bool final_, size_t max_threads_)
-		: aggregator(new Aggregator(keys_names_, aggregates_, overflow_row_)), final(final_), max_threads(max_threads_)
+		: aggregator(keys_names_, aggregates_, overflow_row_, 0, OverflowMode::THROW, nullptr, 0),
+		final(final_), max_threads(max_threads_)
 	{
 		children.push_back(input_);
 	}
@ -35,7 +37,7 @@ public:
 	String getID() const override
 	{
 		std::stringstream res;
-		res << "MergingAggregated(" << children.back()->getID() << ", " << aggregator->getID() << ")";
+		res << "MergingAggregated(" << children.back()->getID() << ", " << aggregator.getID() << ")";
 		return res.str();
 	}

@ -43,7 +45,7 @@ protected:
 	Block readImpl() override;

 private:
-	SharedPtr<Aggregator> aggregator;
+	Aggregator aggregator;
 	bool final;
 	size_t max_threads;

--- a/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h
+++ b/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h
@ -21,8 +21,9 @@ class ParallelAggregatingBlockInputStream : public IProfilingBlockInputStream
 public:
 	ParallelAggregatingBlockInputStream(BlockInputStreams inputs, const ColumnNumbers & keys_,
 		AggregateDescriptions & aggregates_, bool overflow_row_, bool final_, size_t max_threads_,
-		size_t max_rows_to_group_by_ = 0, OverflowMode group_by_overflow_mode_ = OverflowMode::THROW)
-		: aggregator(keys_, aggregates_, overflow_row_, max_rows_to_group_by_, group_by_overflow_mode_),
+		size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
+		Compiler * compiler_, UInt32 min_count_to_compile_)
+		: aggregator(keys_, aggregates_, overflow_row_, max_rows_to_group_by_, group_by_overflow_mode_, compiler_, min_count_to_compile_),
 		final(final_), max_threads(std::min(inputs.size(), max_threads_)),
 		keys_size(keys_.size()), aggregates_size(aggregates_.size()),
 		handler(*this), processor(inputs, max_threads, handler)
@ -34,8 +35,9 @@ public:
 	  */
 	ParallelAggregatingBlockInputStream(BlockInputStreams inputs, const Names & key_names,
 		const AggregateDescriptions & aggregates,	bool overflow_row_, bool final_, size_t max_threads_,
-		size_t max_rows_to_group_by_ = 0, OverflowMode group_by_overflow_mode_ = OverflowMode::THROW)
-		: aggregator(key_names, aggregates, overflow_row_, max_rows_to_group_by_, group_by_overflow_mode_),
+		size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_,
+		Compiler * compiler_, UInt32 min_count_to_compile_)
+		: aggregator(key_names, aggregates, overflow_row_, max_rows_to_group_by_, group_by_overflow_mode_, compiler_, min_count_to_compile_),
 		final(final_), max_threads(std::min(inputs.size(), max_threads_)),
 		keys_size(key_names.size()), aggregates_size(aggregates.size()),
 		handler(*this), processor(inputs, max_threads, handler)
--- a/dbms/include/DB/Interpreters/Aggregator.h
+++ b/dbms/include/DB/Interpreters/Aggregator.h
@ -15,6 +15,7 @@
 #include <DB/Interpreters/AggregateDescription.h>
 #include <DB/Interpreters/AggregationCommon.h>
 #include <DB/Interpreters/Limits.h>
+#include <DB/Interpreters/Compiler.h>

 #include <DB/Columns/ColumnString.h>
 #include <DB/Columns/ColumnFixedString.h>
@ -606,10 +607,11 @@ class Aggregator
 {
 public:
 	Aggregator(const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, bool overflow_row_,
-		size_t max_rows_to_group_by_ = 0, OverflowMode group_by_overflow_mode_ = OverflowMode::THROW)
+		size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_, Compiler * compiler_, UInt32 min_count_to_compile_)
 		: keys(keys_), aggregates(aggregates_), aggregates_size(aggregates.size()),
 		overflow_row(overflow_row_),
-		max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_)
+		max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_),
+		compiler(compiler_), min_count_to_compile(min_count_to_compile_)
 	{
 		std::sort(keys.begin(), keys.end());
 		keys.erase(std::unique(keys.begin(), keys.end()), keys.end());
@ -617,10 +619,11 @@ public:
 	}

 	Aggregator(const Names & key_names_, const AggregateDescriptions & aggregates_, bool overflow_row_,
-		size_t max_rows_to_group_by_ = 0, OverflowMode group_by_overflow_mode_ = OverflowMode::THROW)
+		size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_, Compiler * compiler_, UInt32 min_count_to_compile_)
 		: key_names(key_names_), aggregates(aggregates_), aggregates_size(aggregates.size()),
 		overflow_row(overflow_row_),
-		max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_)
+		max_rows_to_group_by(max_rows_to_group_by_), group_by_overflow_mode(group_by_overflow_mode_),
+		compiler(compiler_), min_count_to_compile(min_count_to_compile_)
 	{
 		std::sort(key_names.begin(), key_names.end());
 		key_names.erase(std::unique(key_names.begin(), key_names.end()), key_names.end());
@ -630,8 +633,9 @@ public:
 	/// Агрегировать источник. Получить результат в виде одной из структур данных.
 	void execute(BlockInputStreamPtr stream, AggregatedDataVariants & result);

-	typedef std::vector<ConstColumnPlainPtrs> AggregateColumns;
-	typedef std::vector<ColumnAggregateFunction::Container_t *> AggregateColumnsData;
+	using AggregateColumns = std::vector<ConstColumnPlainPtrs>;
+	using AggregateColumnsData = std::vector<ColumnAggregateFunction::Container_t *>;
+	using AggregateFunctionsPlainPtrs = std::vector<IAggregateFunction *>;

 	/// Обработать один блок. Вернуть false, если обработку следует прервать (при group_by_overflow_mode = 'break').
 	bool executeOnBlock(Block & block, AggregatedDataVariants & result,
@ -669,7 +673,7 @@ protected:
 	ColumnNumbers keys;
 	Names key_names;
 	AggregateDescriptions aggregates;
-	std::vector<IAggregateFunction *> aggregate_functions;
+	AggregateFunctionsPlainPtrs aggregate_functions;
 	size_t keys_size;
 	size_t aggregates_size;
 	/// Нужно ли класть в AggregatedDataVariants::without_key агрегаты для ключей, не попавших в max_rows_to_group_by.
@ -690,6 +694,18 @@ protected:

 	Logger * log = &Logger::get("Aggregator");

+
+	/** Динамически скомпилированная библиотека для агрегации, если есть.
+	  */
+	Compiler * compiler = nullptr;
+	UInt32 min_count_to_compile;
+	Compiler::SharedLibraryPtr compiled_aggregator;
+	const void * compiled_method_ptr = nullptr;
+
+	bool compiled_if_possible = false;
+	void compileIfPossible(AggregatedDataVariants::Type type);
+
+
 	/** Если заданы только имена столбцов (key_names, а также aggregates[i].column_name), то вычислить номера столбцов.
 	  * Сформировать блок - пример результата.
 	  */
--- a/dbms/include/DB/Interpreters/Compiler.h
+++ b/dbms/include/DB/Interpreters/Compiler.h
@ -1,7 +1,6 @@
 #pragma once

 #include <dlfcn.h>
-#include <stdio.h>

 #include <string>
 #include <mutex>
@ -9,22 +8,12 @@
 #include <unordered_set>
 #include <unordered_map>

-#include <Poco/DirectoryIterator.h>
-
-#include <Yandex/Revision.h>
 #include <Yandex/logger_useful.h>
 #include <statdaemons/threadpool.hpp>

 #include <DB/Core/Types.h>
 #include <DB/Core/Exception.h>
-#include <DB/Common/SipHash.h>
 #include <DB/Common/UInt128.h>
-#include <DB/IO/Operators.h>
-#include <DB/IO/WriteBufferFromString.h>
-#include <DB/IO/ReadBufferFromString.h>
-#include <DB/IO/ReadBufferFromFileDescriptor.h>
-#include <DB/IO/copyData.h>
-#include <DB/IO/WriteBufferFromFile.h>


 namespace DB
@ -79,27 +68,11 @@ public:
 	  * Результаты компиляции сохраняются при перезапуске сервера,
 	  *  но используют в качестве части ключа номер ревизии. То есть, устаревают при обновлении сервера.
 	  */
-	Compiler(const std::string & path_, size_t threads)
-		: path(path_), pool(threads)
-	{
-		Poco::DirectoryIterator dir_end;
-		for (Poco::DirectoryIterator dir_it(path); dir_end != dir_it; ++dir_it)
-		{
-			std::string name = dir_it.name();
-			if (name.length() > strlen(".so") && 0 == name.compare(name.size() - 3, 3, ".so"))
-			{
-				files.insert(name.substr(0, name.size() - 3));
-			}
-		}
+	Compiler(const std::string & path_, size_t threads);
+	~Compiler();

-		LOG_INFO(log, "Having " << files.size() << " compiled files from previous start.");
-	}
-
-	~Compiler()
-	{
- 		LOG_DEBUG(log, "Waiting for threads to finish.");
-		pool.wait();
-	}
+	using HashedKey = UInt128;
+	using SharedLibraryPtr = std::shared_ptr<SharedLibrary>;

 	/** Увеличить счётчик для заданного ключа key на единицу.
 	  * Если результат компиляции уже есть (уже открыт, или есть файл с библиотекой),
@ -108,67 +81,14 @@ public:
 	  *  инициировать компиляцию в отдельном потоке, если есть свободные потоки, и вернуть nullptr.
 	  * Иначе вернуть nullptr.
 	  */
-	std::shared_ptr<SharedLibrary> getOrCount(
+	SharedLibraryPtr getOrCount(
 		const std::string & key,
 		UInt32 min_count_to_compile,
-		std::function<std::string()> get_code)
-	{
-		HashedKey hashed_key = getHash(key);
-
-		std::lock_guard<std::mutex> lock(mutex);
-
-		UInt32 count = ++counts[hashed_key];
-
-		/// Есть готовая открытая библиотека? Или, если библиотека в процессе компиляции, там будет nullptr.
-		Libraries::iterator it = libraries.find(hashed_key);
-		if (libraries.end() != it)
-		{
-			if (!it->second)
-				LOG_INFO(log, "Library " << hashedKeyToFileName(hashed_key) << " is compiling.");
-
-			return it->second;
-		}
-
-		/// Есть файл с библиотекой, оставшийся от предыдущего запуска?
-		std::string file_name = hashedKeyToFileName(hashed_key);
-		if (files.count(file_name))
-			return libraries.emplace(std::piecewise_construct,
-				std::forward_as_tuple(hashed_key),
-				std::forward_as_tuple(new SharedLibrary(path + '/' + file_name + ".so"))).first->second;
-
-		/// Достигнуто ли min_count_to_compile?
-		if (count >= min_count_to_compile)
-		{
-			/// Есть ли свободные потоки.
-			if (pool.active() < pool.size())
-			{
-				/// Обозначает, что библиотека в процессе компиляции.
-				libraries[hashed_key] = nullptr;
-
-				pool.schedule([=]
-				{
-					try
-					{
-						compile(hashed_key, file_name, get_code);
-					}
-					catch (...)
-					{
-						tryLogCurrentException("Compiler");
-					}
-				});
-			}
-			else
-				LOG_INFO(log, "All threads are busy.");
-		}
-
-		return nullptr;
-	}
-
+		std::function<std::string()> get_code);

 private:
-	using HashedKey = UInt128;
 	using Counts = std::unordered_map<HashedKey, UInt32, UInt128Hash>;
-	using Libraries = std::unordered_map<HashedKey, std::shared_ptr<SharedLibrary>, UInt128Hash>;
+	using Libraries = std::unordered_map<HashedKey, SharedLibraryPtr, UInt128Hash>;
 	using Files = std::unordered_set<std::string>;

 	const std::string path;
@ -188,137 +108,7 @@ private:
 	Logger * log = &Logger::get("Compiler");


-	static HashedKey getHash(const std::string & key)
-	{
-		SipHash hash;
-
-		auto revision = Revision::get();
-		hash.update(reinterpret_cast<const char *>(&revision), sizeof(revision));
-		hash.update(key.data(), key.size());
-
-		HashedKey res;
-		hash.get128(res.first, res.second);
-		return res;
-	}
-
-	/// Без расширения .so.
-	static std::string hashedKeyToFileName(HashedKey hashed_key)
-	{
-		std::string file_name;
-
-		{
-			WriteBufferFromString out(file_name);
-			out << hashed_key.first << '_' << hashed_key.second;
-		}
-
-		return file_name;
-	}
-
-	static HashedKey fileNameToHashedKey(const std::string & file_name)
-	{
-		HashedKey hashed_key;
-
-		{
-			ReadBufferFromString in(file_name);
-			in >> hashed_key.first >> "_" >> hashed_key.second;
-		}
-
-		return hashed_key;
-	}
-
-	struct Pipe : private boost::noncopyable
-	{
-		FILE * f;
-
-		Pipe(const std::string & command)
-		{
-			errno = 0;
-			f = popen(command.c_str(), "r");
-
-			if (!f)
-				throwFromErrno("Cannot popen");
-		}
-
-		~Pipe()
-		{
-			try
-			{
-				errno = 0;
-				if (f && -1 == pclose(f))
-					throwFromErrno("Cannot pclose");
-			}
-			catch (...)
-			{
-				tryLogCurrentException("Pipe");
-			}
-		}
-	};
-
-	void compile(HashedKey hashed_key, std::string file_name, std::function<std::string()> get_code)
-	{
-		LOG_INFO(log, "Compiling code " << file_name);
-
-		std::string prefix = path + "/" + file_name;
-		std::string cpp_file_path = prefix + ".cpp";
-		std::string so_file_path = prefix + ".so";
-
-		{
-			WriteBufferFromFile out(cpp_file_path);
-			out << get_code();
-		}
-
-		std::stringstream command;
-
-		/// Слегка неудобно.
-		command <<
-			"/usr/share/clickhouse/bin/clang"
-			" -x c++ -std=gnu++11 -O3 -g -Wall -Werror -Wnon-virtual-dtor -march=native -D NDEBUG"
-			" -shared -fPIC -fvisibility=hidden -fno-implement-inlines"
-			" -isystem /usr/share/clickhouse/headers/usr/local/include/"
-			" -isystem /usr/share/clickhouse/headers/usr/include/"
-			" -isystem /usr/share/clickhouse/headers/usr/include/c++/4.8/"
-			" -isystem /usr/share/clickhouse/headers/usr/include/x86_64-linux-gnu/c++/4.8/"
-			" -isystem /usr/share/clickhouse/headers/usr/lib/gcc/x86_64-linux-gnu/4.8/include/"
-			" -I /usr/share/clickhouse/headers/dbms/include/"
-			" -I /usr/share/clickhouse/headers/libs/libcityhash/"
-			" -I /usr/share/clickhouse/headers/libs/libcommon/include/"
-			" -I /usr/share/clickhouse/headers/libs/libdouble-conversion/src/"
-			" -I /usr/share/clickhouse/headers/libs/libmysqlxx/include/"
-			" -I /usr/share/clickhouse/headers/libs/libstatdaemons/include/"
-			" -I /usr/share/clickhouse/headers/libs/libstats/include/"
-			" -o " << so_file_path << " " << cpp_file_path
-			<< " 2>&1 || echo Exit code: $?";
-
-		std::string compile_result;
-
-		{
-			Pipe pipe(command.str());
-
-			int pipe_fd = fileno(pipe.f);
-			if (-1 == pipe_fd)
-				throwFromErrno("Cannot fileno");
-
-			{
-				ReadBufferFromFileDescriptor command_output(pipe_fd);
-				WriteBufferFromString res(compile_result);
-
-				copyData(command_output, res);
-			}
-		}
-
-		if (!compile_result.empty())
-			throw Exception("Cannot compile code:\n\n" + command.str() + "\n\n" + compile_result);
-
-		/// Если до этого была ошибка, то файл с кодом остаётся для возможности просмотра.
-		Poco::File(cpp_file_path).remove();
-
-		{
-			std::lock_guard<std::mutex> lock(mutex);
-			libraries[hashed_key].reset(new SharedLibrary(so_file_path));
-		}
-
-		LOG_INFO(log, "Compiled code " << file_name);
-	}
+	void compile(HashedKey hashed_key, std::string file_name, std::function<std::string()> get_code);
 };

 }
--- a/dbms/include/DB/Interpreters/Context.h
+++ b/dbms/include/DB/Interpreters/Context.h
@ -27,6 +27,7 @@
 #include <DB/Interpreters/ProcessList.h>
 #include <DB/Interpreters/Cluster.h>
 #include <DB/Interpreters/InterserverIOHandler.h>
+#include <DB/Interpreters/Compiler.h>
 #include <DB/Client/ConnectionPool.h>
 #include <statdaemons/ConfigProcessor.h>
 #include <zkutil/ZooKeeper.h>
@ -99,6 +100,7 @@ struct ContextShared
 	InterserverIOHandler interserver_io_handler;			/// Обработчик для межсерверной передачи данных.
 	BackgroundProcessingPoolPtr background_pool;			/// Пул потоков для фоновой работы, выполняемой таблицами.
 	Macros macros;											/// Подстановки из конфига.
+	Compiler compiler { path + "build/", 1 };				/// Для динамической компиляции частей запроса, при необходимости.

 	/// Кластеры для distributed таблиц
 	/// Создаются при создании Distributed таблиц, так как нужно дождаться пока будут выставлены Settings
@ -334,6 +336,8 @@ public:
 	void initClusters();
 	Cluster & getCluster(const std::string & cluster_name);

+	Compiler & getCompiler();
+
 	void shutdown() { shared->shutdown(); }
 };

--- a/dbms/include/DB/Interpreters/Settings.h
+++ b/dbms/include/DB/Interpreters/Settings.h
@ -86,6 +86,11 @@ struct Settings
 	\
 	/** Сэмплирование по умолчанию. Если равно 1, то отключено. */ \
 	M(SettingFloat, default_sample, 1.0) \
+	\
+	/** Включена ли компиляция запросов. */ \
+	M(SettingBool, compile, false) \
+	/** Количество одинаковых по структуре запросов перед тем, как инициируется их компиляция. */ \
+	M(SettingUInt64, min_count_to_compile, 0) \

 	/// Всевозможные ограничения на выполнение запроса.
 	Limits limits;
--- a/dbms/include/DB/Interpreters/SplittingAggregator.h
+++ b/dbms/include/DB/Interpreters/SplittingAggregator.h
@ -34,16 +34,18 @@ class SplittingAggregator : private Aggregator
 {
 public:
 	SplittingAggregator(const ColumnNumbers & keys_, const AggregateDescriptions & aggregates_, size_t threads_,
-		bool with_totals_, size_t max_rows_to_group_by_ = 0, OverflowMode group_by_overflow_mode_ = OverflowMode::THROW)
-		: Aggregator(keys_, aggregates_, with_totals_, max_rows_to_group_by_, group_by_overflow_mode_), threads(threads_), pool(threads),
+		bool with_totals_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_)
+		: Aggregator(keys_, aggregates_, with_totals_, max_rows_to_group_by_, group_by_overflow_mode_, nullptr, 0),
+		threads(threads_), pool(threads),
 		log(&Logger::get("SplittingAggregator")), method(AggregatedDataVariants::Type::EMPTY),
 		key_columns(keys_size), aggregate_columns(aggregates_size), rows(0), src_rows(0), src_bytes(0), size_of_all_results(0)
 	{
 	}

 	SplittingAggregator(const Names & key_names_, const AggregateDescriptions & aggregates_, size_t threads_,
-		bool with_totals_, size_t max_rows_to_group_by_ = 0, OverflowMode group_by_overflow_mode_ = OverflowMode::THROW)
-		: Aggregator(key_names_, aggregates_, with_totals_, max_rows_to_group_by_, group_by_overflow_mode_), threads(threads_), pool(threads),
+		bool with_totals_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_)
+		: Aggregator(key_names_, aggregates_, with_totals_, max_rows_to_group_by_, group_by_overflow_mode_, nullptr, 0),
+		threads(threads_), pool(threads),
 		log(&Logger::get("SplittingAggregator")), method(AggregatedDataVariants::Type::EMPTY),
 		key_columns(keys_size), aggregate_columns(aggregates_size), rows(0), src_rows(0), src_bytes(0), size_of_all_results(0)
 	{
--- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp
+++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp
@ -7,26 +7,14 @@ namespace DB
 {


-AggregatingBlockInputStream::AggregatingBlockInputStream(BlockInputStreamPtr input_,
-	const Names & key_names, const AggregateDescriptions & aggregates,
-	bool overflow_row_, bool final_, size_t max_rows_to_group_by_, OverflowMode group_by_overflow_mode_)
-	: final(final_)
-{
-	children.push_back(input_);
-
-	aggregator = new Aggregator(key_names, aggregates, overflow_row_, max_rows_to_group_by_, group_by_overflow_mode_);
-}
-
-
-
 Block AggregatingBlockInputStream::readImpl()
 {
 	if (!executed)
 	{
 		executed = true;
 		AggregatedDataVariants data_variants;
-		aggregator->execute(children.back(), data_variants);
-		blocks = aggregator->convertToBlocks(data_variants, final, 1);
+		aggregator.execute(children.back(), data_variants);
+		blocks = aggregator.convertToBlocks(data_variants, final, 1);
 		it = blocks.begin();
 	}

--- a/dbms/src/DataStreams/MergingAggregatedBlockInputStream.cpp
+++ b/dbms/src/DataStreams/MergingAggregatedBlockInputStream.cpp
@ -13,8 +13,8 @@ Block MergingAggregatedBlockInputStream::readImpl()
 	{
 		executed = true;
 		AggregatedDataVariants data_variants;
-		aggregator->mergeStream(children.back(), data_variants, max_threads);
-		blocks = aggregator->convertToBlocks(data_variants, final, max_threads);
+		aggregator.mergeStream(children.back(), data_variants, max_threads);
+		blocks = aggregator.convertToBlocks(data_variants, final, max_threads);
 		it = blocks.begin();
 	}

--- a/dbms/src/DataStreams/tests/aggregating_stream.cpp
+++ b/dbms/src/DataStreams/tests/aggregating_stream.cpp
@ -91,7 +91,8 @@ int main(int argc, char ** argv)
 		}

 		DB::BlockInputStreamPtr stream = new DB::OneBlockInputStream(block);
-		stream = new DB::AggregatingBlockInputStream(stream, key_column_numbers, aggregate_descriptions, false, true, 0, DB::OverflowMode::THROW);
+		stream = new DB::AggregatingBlockInputStream(stream, key_column_numbers, aggregate_descriptions, false, true,
+													 0, DB::OverflowMode::THROW, nullptr);

 		DB::WriteBufferFromOStream ob(std::cout);
 		DB::RowOutputStreamPtr row_out = new DB::TabSeparatedRowOutputStream(ob, sample);
--- a/dbms/src/Interpreters/Aggregator.cpp
+++ b/dbms/src/Interpreters/Aggregator.cpp
@ -127,6 +127,97 @@ void Aggregator::initialize(Block & block)
 }


+void Aggregator::compileIfPossible(AggregatedDataVariants::Type type)
+{
+	std::lock_guard<std::mutex> lock(mutex);
+
+	if (compiled_if_possible)
+		return;
+
+	compiled_if_possible = true;
+
+	std::string method_typename;
+
+#define M(NAME, IS_TWO_LEVEL) \
+	else if (type == AggregatedDataVariants::Type::NAME) \
+		method_typename = "decltype(AggregatedDataVariants::" ## NAME ## ")::element_type";
+
+	if (false) {}
+		APPLY_FOR_AGGREGATED_VARIANTS(M)
+#undef M
+	else if (type != AggregatedDataVariants::Type::without_key)		/// TODO Реализовать для without_key
+		throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
+
+	/// Список типов агрегатных функций.
+	std::stringstream aggregate_functions_typenames_str;
+	for (size_t i = 0; i < aggregates_size; ++i)
+	{
+		int status = 0;
+		char * type_name_ptr = abi::__cxa_demangle(typeid(*aggregate_functions[i]).name(), 0, 0, &status);
+		std::string type_name = type_name_ptr;
+		free(type_name_ptr);
+
+		if (status)
+			throw Exception("Cannot compile code: cannot demangle name " + String(typeid(*aggregate_functions[i]).name())
+				+ ", status: " + toString(status)/* TODO ErrorCodes*/);
+
+		aggregate_functions_typenames_str << ((i != 0) ? ", " : "") << type_name;
+	}
+
+	std::string aggregate_functions_typenames = aggregate_functions_typenames_str.str();
+
+	std::string key = "Aggregate: " + method_typename + ", " + aggregate_functions_typenames;
+
+	auto get_code = [method_typename, aggregate_functions_typenames]
+	{
+		/// Короткий кусок кода, представляющий собой явное инстанцирование шаблона.
+		std::stringstream code;
+		code <<
+			"#include <DB/Interpreters/SpecializedAggregator.h>\n"
+			"\n"
+			"namespace DB\n"
+			"{\n"
+			"\n"
+			"template void Aggregator::executeSpecialized<\n"
+				"\t" << method_typename << ", TypeList<" << aggregate_functions_typenames << ">>(\n"
+				"\t" << method_typename << " &, Arena *, size_t, ConstColumnPlainPtrs &,\n"
+				"\tAggregateColumns &, const Sizes &, StringRefs &, bool, AggregateDataPtr) const;\n"
+			"\n"
+			"static void wrapper(\n"
+				"\tconst Aggregator & aggregator,\n"
+				"\t" << method_typename << " & method,\n"
+				"\tArena * arena,\n"
+				"\tsize_t rows,\n"
+				"\tConstColumnPlainPtrs & key_columns,\n"
+				"\tAggregator::AggregateColumns & aggregate_columns,\n"
+				"\tconst Sizes & key_sizes,\n"
+				"\tStringRefs & keys,\n"
+				"\tbool no_more_keys,\n"
+				"\tAggregateDataPtr overflow_row)\n"
+			"{\n"
+				"\taggregator.executeSpecialized<\n"
+					"\t\t" << method_typename << ", TypeList<" << aggregate_functions_typenames << ">>(\n"
+					"\t\tmethod, arena, rows, key_columns, aggregate_columns, key_sizes, keys, no_more_keys, overflow_row);\n"
+			"}\n"
+			"\n"
+			"const void * getPtr() __attribute__((__visibility__(\"default\")));\n"
+			"const void * getPtr()\n"	/// Без этой обёртки непонятно, как достать нужный символ из скомпилированной библиотеки.
+			"{\n"
+				"\treturn reinterpret_cast<const void *>(&wrapper);\n"
+			"}\n"
+			"\n"
+			"}\n";
+
+		return code.str();
+	};
+
+	compiled_aggregator = compiler->getOrCount(key, min_count_to_compile, get_code);
+
+	if (compiled_aggregator)
+		compiled_method_ptr = compiled_aggregator->get<const void * (*) ()>("_ZN2DB6getPtrEv")();
+}
+
+
 AggregatedDataVariants::Type Aggregator::chooseAggregationMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes)
 {
 	bool keys_fit_128_bits = true;
@ -352,6 +443,9 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result,
 		result.keys_size = keys_size;
 		result.key_sizes = key_sizes;
 		LOG_TRACE(log, "Aggregation method: " << result.getMethodName());
+
+		if (compiler)
+			compileIfPossible(result.type);
 	}

 	if ((overflow_row || result.type == AggregatedDataVariants::Type::without_key) && !result.without_key)
@ -384,16 +478,36 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result,

 	AggregateDataPtr overflow_row_ptr = overflow_row ? result.without_key : nullptr;

-#define M(NAME, IS_TWO_LEVEL) \
-	else if (result.type == AggregatedDataVariants::Type::NAME) \
-		executeImpl(*result.NAME, result.aggregates_pool, rows, key_columns, aggregate_columns, \
-			result.key_sizes, key, no_more_keys, overflow_row_ptr);
+	if (compiled_method_ptr)	/// NOTE Можно динамически начинать применять компилированный агрегатор, когда он станет готов, во время запроса.
+	{
+	#define M(NAME, IS_TWO_LEVEL) \
+		else if (result.type == AggregatedDataVariants::Type::NAME) \
+			reinterpret_cast<void (*)( \
+				const Aggregator &, decltype(result.NAME)::element_type &, \
+				Arena *, size_t, ConstColumnPlainPtrs &, AggregateColumns &, \
+				const Sizes &, StringRefs &, bool, AggregateDataPtr)>(compiled_method_ptr) \
+			(*this, *result.NAME, result.aggregates_pool, rows, key_columns, aggregate_columns, \
+				result.key_sizes, key, no_more_keys, overflow_row_ptr);

-	if (false) {}
-	APPLY_FOR_AGGREGATED_VARIANTS(M)
-#undef M
-	else if (result.type != AggregatedDataVariants::Type::without_key)
-		throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
+		if (false) {}
+		APPLY_FOR_AGGREGATED_VARIANTS(M)
+	#undef M
+		else if (result.type != AggregatedDataVariants::Type::without_key)
+			throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
+	}
+	else
+	{
+	#define M(NAME, IS_TWO_LEVEL) \
+		else if (result.type == AggregatedDataVariants::Type::NAME) \
+			executeImpl(*result.NAME, result.aggregates_pool, rows, key_columns, aggregate_columns, \
+				result.key_sizes, key, no_more_keys, overflow_row_ptr);
+
+		if (false) {}
+		APPLY_FOR_AGGREGATED_VARIANTS(M)
+	#undef M
+		else if (result.type != AggregatedDataVariants::Type::without_key)
+			throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
+	}

 	size_t result_size = result.size();

--- a/dbms/src/Interpreters/Compiler.cpp
+++ b/dbms/src/Interpreters/Compiler.cpp
@ -0,0 +1,230 @@
+#include <stdio.h>
+
+#include <Poco/DirectoryIterator.h>
+#include <Yandex/Revision.h>
+
+#include <DB/Common/SipHash.h>
+
+#include <DB/IO/Operators.h>
+#include <DB/IO/WriteBufferFromString.h>
+#include <DB/IO/ReadBufferFromString.h>
+#include <DB/IO/ReadBufferFromFileDescriptor.h>
+#include <DB/IO/copyData.h>
+#include <DB/IO/WriteBufferFromFile.h>
+
+#include <DB/Interpreters/Compiler.h>
+
+
+namespace DB
+{
+
+
+Compiler::Compiler(const std::string & path_, size_t threads)
+	: path(path_), pool(threads)
+{
+	Poco::File(path).createDirectory();
+
+	Poco::DirectoryIterator dir_end;
+	for (Poco::DirectoryIterator dir_it(path); dir_end != dir_it; ++dir_it)
+	{
+		std::string name = dir_it.name();
+		if (name.length() > strlen(".so") && 0 == name.compare(name.size() - 3, 3, ".so"))
+		{
+			files.insert(name.substr(0, name.size() - 3));
+		}
+	}
+
+	LOG_INFO(log, "Having " << files.size() << " compiled files from previous start.");
+}
+
+Compiler::~Compiler()
+{
+	LOG_DEBUG(log, "Waiting for threads to finish.");
+	pool.wait();
+}
+
+
+static Compiler::HashedKey getHash(const std::string & key)
+{
+	SipHash hash;
+
+	auto revision = Revision::get();
+	hash.update(reinterpret_cast<const char *>(&revision), sizeof(revision));
+	hash.update(key.data(), key.size());
+
+	Compiler::HashedKey res;
+	hash.get128(res.first, res.second);
+	return res;
+}
+
+
+/// Без расширения .so.
+static std::string hashedKeyToFileName(Compiler::HashedKey hashed_key)
+{
+	std::string file_name;
+
+	{
+		WriteBufferFromString out(file_name);
+		out << hashed_key.first << '_' << hashed_key.second;
+	}
+
+	return file_name;
+}
+
+
+Compiler::SharedLibraryPtr Compiler::getOrCount(
+	const std::string & key,
+	UInt32 min_count_to_compile,
+	std::function<std::string()> get_code)
+{
+	HashedKey hashed_key = getHash(key);
+
+	std::lock_guard<std::mutex> lock(mutex);
+
+	UInt32 count = ++counts[hashed_key];
+
+	/// Есть готовая открытая библиотека? Или, если библиотека в процессе компиляции, там будет nullptr.
+	Libraries::iterator it = libraries.find(hashed_key);
+	if (libraries.end() != it)
+	{
+		if (!it->second)
+			LOG_INFO(log, "Library " << hashedKeyToFileName(hashed_key) << " is compiling.");
+
+		return it->second;
+	}
+
+	/// Есть файл с библиотекой, оставшийся от предыдущего запуска?
+	std::string file_name = hashedKeyToFileName(hashed_key);
+	if (files.count(file_name))
+		return libraries.emplace(std::piecewise_construct,
+			std::forward_as_tuple(hashed_key),
+			std::forward_as_tuple(new SharedLibrary(path + '/' + file_name + ".so"))).first->second;
+
+	/// Достигнуто ли min_count_to_compile?
+	if (count >= min_count_to_compile)
+	{
+		/// TODO Значение min_count_to_compile, равное нулю, обозначает необходимость синхронной компиляции.
+
+		/// Есть ли свободные потоки.
+		if (pool.active() < pool.size())
+		{
+			/// Обозначает, что библиотека в процессе компиляции.
+			libraries[hashed_key] = nullptr;
+
+			pool.schedule([=]
+			{
+				try
+				{
+					compile(hashed_key, file_name, get_code);
+				}
+				catch (...)
+				{
+					tryLogCurrentException("Compiler");
+				}
+			});
+		}
+		else
+			LOG_INFO(log, "All threads are busy.");
+	}
+
+	return nullptr;
+}
+
+
+struct Pipe : private boost::noncopyable
+{
+	FILE * f;
+
+	Pipe(const std::string & command)
+	{
+		errno = 0;
+		f = popen(command.c_str(), "r");
+
+		if (!f)
+			throwFromErrno("Cannot popen");
+	}
+
+	~Pipe()
+	{
+		try
+		{
+			errno = 0;
+			if (f && -1 == pclose(f))
+				throwFromErrno("Cannot pclose");
+		}
+		catch (...)
+		{
+			tryLogCurrentException("Pipe");
+		}
+	}
+};
+
+
+void Compiler::compile(HashedKey hashed_key, std::string file_name, std::function<std::string()> get_code)
+{
+	LOG_INFO(log, "Compiling code " << file_name);
+
+	std::string prefix = path + "/" + file_name;
+	std::string cpp_file_path = prefix + ".cpp";
+	std::string so_file_path = prefix + ".so";
+
+	{
+		WriteBufferFromFile out(cpp_file_path);
+		out << get_code();
+	}
+
+	std::stringstream command;
+
+	/// Слегка неудобно.
+	command <<
+		"/usr/share/clickhouse/bin/clang"
+		" -x c++ -std=gnu++11 -O3 -g -Wall -Werror -Wnon-virtual-dtor -march=native -D NDEBUG"
+		" -shared -fPIC -fvisibility=hidden -fno-implement-inlines"
+		" -isystem /usr/share/clickhouse/headers/usr/local/include/"
+		" -isystem /usr/share/clickhouse/headers/usr/include/"
+		" -isystem /usr/share/clickhouse/headers/usr/include/c++/4.8/"
+		" -isystem /usr/share/clickhouse/headers/usr/include/x86_64-linux-gnu/c++/4.8/"
+		" -isystem /usr/share/clickhouse/headers/usr/local/lib/clang/3.6.0/include/"
+		" -I /usr/share/clickhouse/headers/dbms/include/"
+		" -I /usr/share/clickhouse/headers/libs/libcityhash/"
+		" -I /usr/share/clickhouse/headers/libs/libcommon/include/"
+		" -I /usr/share/clickhouse/headers/libs/libdouble-conversion/"
+		" -I /usr/share/clickhouse/headers/libs/libmysqlxx/include/"
+		" -I /usr/share/clickhouse/headers/libs/libstatdaemons/include/"
+		" -I /usr/share/clickhouse/headers/libs/libstats/include/"
+		" -o " << so_file_path << " " << cpp_file_path
+		<< " 2>&1 || echo Exit code: $?";
+
+	std::string compile_result;
+
+	{
+		Pipe pipe(command.str());
+
+		int pipe_fd = fileno(pipe.f);
+		if (-1 == pipe_fd)
+			throwFromErrno("Cannot fileno");
+
+		{
+			ReadBufferFromFileDescriptor command_output(pipe_fd);
+			WriteBufferFromString res(compile_result);
+
+			copyData(command_output, res);
+		}
+	}
+
+	if (!compile_result.empty())
+		throw Exception("Cannot compile code:\n\n" + command.str() + "\n\n" + compile_result);
+
+	/// Если до этого была ошибка, то файл с кодом остаётся для возможности просмотра.
+	Poco::File(cpp_file_path).remove();
+
+	{
+		std::lock_guard<std::mutex> lock(mutex);
+		libraries[hashed_key].reset(new SharedLibrary(so_file_path));
+	}
+
+	LOG_INFO(log, "Compiled code " << file_name);
+}
+
+
+}
--- a/dbms/src/Interpreters/Context.cpp
+++ b/dbms/src/Interpreters/Context.cpp
@ -634,4 +634,12 @@ Cluster & Context::getCluster(const std::string & cluster_name)
 	else
 		throw Poco::Exception("Failed to find cluster with name = " + cluster_name);
 }
+
+
+Compiler & Context::getCompiler()
+{
+	return shared->compiler;
+}
+
+
 }
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@ -759,7 +759,9 @@ void InterpreterSelectQuery::executeAggregation(BlockInputStreams & streams, Exp
 		{
 			stream = maybeAsynchronous(
 				new ParallelAggregatingBlockInputStream(streams, key_names, aggregates, overflow_row, final,
-				settings.max_threads, settings.limits.max_rows_to_group_by, settings.limits.group_by_overflow_mode), settings.asynchronous);
+				settings.max_threads, settings.limits.max_rows_to_group_by, settings.limits.group_by_overflow_mode,
+				settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile),
+				settings.asynchronous);
 		}
 		else
 		{
@ -779,7 +781,9 @@ void InterpreterSelectQuery::executeAggregation(BlockInputStreams & streams, Exp
 	}
 	else
 		stream = maybeAsynchronous(new AggregatingBlockInputStream(stream, key_names, aggregates, overflow_row, final,
-			settings.limits.max_rows_to_group_by, settings.limits.group_by_overflow_mode), settings.asynchronous);
+			settings.limits.max_rows_to_group_by, settings.limits.group_by_overflow_mode,
+			settings.compile ? &context.getCompiler() : nullptr, settings.min_count_to_compile),
+			settings.asynchronous);
 }


--- a/dbms/src/Interpreters/tests/aggregate.cpp
+++ b/dbms/src/Interpreters/tests/aggregate.cpp
@ -23,7 +23,7 @@ int main(int argc, char ** argv)
 		size_t n = argc == 2 ? atoi(argv[1]) : 10;

 		DB::Block block;
-		
+
 		DB::ColumnWithNameAndType column_x;
 		column_x.name = "x";
 		column_x.type = new DB::DataTypeInt16;
@ -73,8 +73,8 @@ int main(int argc, char ** argv)
 		DB::DataTypes empty_list_of_types;
 		aggregate_descriptions[0].function = factory.get("count", empty_list_of_types);

-		DB::Aggregator aggregator(key_column_numbers, aggregate_descriptions, false);
-		
+		DB::Aggregator aggregator(key_column_numbers, aggregate_descriptions, false, 0, DB::OverflowMode::THROW, nullptr, 0);
+
 		{
 			Poco::Stopwatch stopwatch;
 			stopwatch.start();