From 6859f2085e1bc5ee2749885dba5fa0b5fd5a9edf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 4 Dec 2015 23:33:38 +0300 Subject: [PATCH 01/40] dbms: fixed error with TOTALS [#METR-19259]. --- .../Interpreters/InterpreterSelectQuery.cpp | 10 +++ .../00285_not_all_data_in_totals.reference | 66 +++++++++++++++++++ .../00285_not_all_data_in_totals.sql | 3 + 3 files changed, 79 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00285_not_all_data_in_totals.reference create mode 100644 dbms/tests/queries/0_stateless/00285_not_all_data_in_totals.sql diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index d2efc2b8721..47cb0678e8a 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1142,8 +1142,18 @@ void InterpreterSelectQuery::executeLimit() * если нет WITH TOTALS и есть подзапрос в FROM, и там на одном из уровней есть WITH TOTALS, * то при использовании LIMIT-а следует читать данные до конца, а не отменять выполнение запроса раньше, * потому что при отмене выполнения запроса, мы не получим данные для totals с удалённого сервера. + * + * Ещё случай: + * если есть WITH TOTALS и нет ORDER BY, то читать данные до конца, + * иначе TOTALS посчитается по неполным данным. */ bool always_read_till_end = false; + + if (query.group_by_with_totals && !query.order_expression_list) + { + always_read_till_end = true; + } + if (!query.group_by_with_totals && query.table && typeid_cast(query.table.get())) { const ASTSelectQuery * subquery = static_cast(query.table.get()); diff --git a/dbms/tests/queries/0_stateless/00285_not_all_data_in_totals.reference b/dbms/tests/queries/0_stateless/00285_not_all_data_in_totals.reference new file mode 100644 index 00000000000..961d8a34c09 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00285_not_all_data_in_totals.reference @@ -0,0 +1,66 @@ +{ + "meta": + [ + { + "name": "ignore(x)", + "type": "UInt8" + }, + { + "name": "count()", + "type": "UInt64" + } + ], + + "data": + [ + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"] + ], + + "totals": [0,"2000"], + + "rows": 10, + + "rows_before_limit_at_least": 1000 +} +{ + "meta": + [ + { + "name": "ignore(x)", + "type": "UInt8" + }, + { + "name": "count()", + "type": "UInt64" + } + ], + + "data": + [ + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"], + [0, "2"] + ], + + "totals": [0,"2000"], + + "rows": 10, + + "rows_before_limit_at_least": 1000 +} diff --git a/dbms/tests/queries/0_stateless/00285_not_all_data_in_totals.sql b/dbms/tests/queries/0_stateless/00285_not_all_data_in_totals.sql new file mode 100644 index 00000000000..26d80546c51 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00285_not_all_data_in_totals.sql @@ -0,0 +1,3 @@ +SET group_by_two_level_threshold = 1; +SELECT ignore(x), count() FROM (SELECT number AS x FROM system.numbers LIMIT 1000 UNION ALL SELECT number AS x FROM system.numbers LIMIT 1000) GROUP BY x WITH TOTALS LIMIT 10 FORMAT JSONCompact; +SELECT ignore(x), count() FROM (SELECT number AS x FROM system.numbers LIMIT 1000 UNION ALL SELECT number AS x FROM system.numbers LIMIT 1000) GROUP BY x WITH TOTALS ORDER BY x LIMIT 10 FORMAT JSONCompact; From e1deff76a9372fa949197e112c870d385e203855 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 5 Dec 2015 06:04:13 +0300 Subject: [PATCH 02/40] dbms: fixed error [#METR-19262]. --- ...MergingAggregatedMemoryEfficientBlockInputStream.h | 2 ++ ...rgingAggregatedMemoryEfficientBlockInputStream.cpp | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h b/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h index 632eb87b9ca..1f8aa5e561c 100644 --- a/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h +++ b/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h @@ -30,6 +30,8 @@ public: MergingAggregatedMemoryEfficientBlockInputStream( BlockInputStreams inputs_, const Aggregator::Params & params, bool final_, size_t threads_); + ~MergingAggregatedMemoryEfficientBlockInputStream(); + String getName() const override { return "MergingAggregatedMemoryEfficient"; } String getID() const override; diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 35f726862de..9376fd089d4 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -95,6 +95,17 @@ Block MergingAggregatedMemoryEfficientBlockInputStream::readImpl() } +MergingAggregatedMemoryEfficientBlockInputStream::~MergingAggregatedMemoryEfficientBlockInputStream() +{ + if (parallel_merge_data) + { + LOG_TRACE((&Logger::get("MergingAggregatedMemoryEfficientBlockInputStream")), "Waiting for threads to finish"); + parallel_merge_data->result_queue.clear(); + parallel_merge_data->pool.wait(); + } +} + + void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread(MemoryTracker * memory_tracker) { setThreadName("MrgAggMemEffThr"); From fa76a1a452ce1618644fec8bf05c2e969ce71443 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 5 Dec 2015 07:20:37 +0300 Subject: [PATCH 03/40] dbms: MergingAggregatedMemoryEfficientBlockInputStream: reading data in parallel [#METR-17000]. --- ...ggregatedMemoryEfficientBlockInputStream.h | 28 ++-- .../ParallelAggregatingBlockInputStream.h | 3 +- .../AggregatingBlockInputStream.cpp | 2 +- ...regatedMemoryEfficientBlockInputStream.cpp | 137 +++++++++++++----- .../Interpreters/InterpreterSelectQuery.cpp | 1 + 5 files changed, 116 insertions(+), 55 deletions(-) diff --git a/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h b/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h index 1f8aa5e561c..e5875c874c9 100644 --- a/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h +++ b/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h @@ -10,25 +10,17 @@ namespace DB { -/** Доагрегирует потоки блоков, держа в оперативной памяти только по одному блоку из каждого потока. +/** Доагрегирует потоки блоков, держа в оперативной памяти только по одному или несколько (до merging_threads) блоков из каждого источника. * Это экономит оперативку в случае использования двухуровневой агрегации, где в каждом потоке будет до 256 блоков с частями результата. * * Агрегатные функции в блоках не должны быть финализированы, чтобы их состояния можно было объединить. - * - * Замечания: - * - * На хорошей сети (10Gbit) может работать заметно медленнее, так как чтения блоков с разных - * удалённых серверов делаются последовательно, при этом, чтение упирается в CPU. - * Это несложно исправить. - * - * Можно держать в памяти не по одному блоку из каждого источника, а по несколько, и распараллелить мердж. - * При этом будет расходоваться кратно больше оперативки. */ class MergingAggregatedMemoryEfficientBlockInputStream : public IProfilingBlockInputStream { public: MergingAggregatedMemoryEfficientBlockInputStream( - BlockInputStreams inputs_, const Aggregator::Params & params, bool final_, size_t threads_); + BlockInputStreams inputs_, const Aggregator::Params & params, bool final_, + size_t reading_threads_, size_t merging_threads_); ~MergingAggregatedMemoryEfficientBlockInputStream(); @@ -36,17 +28,21 @@ public: String getID() const override; + /// Отправляет запрос (инициирует вычисления) раньше, чем read. + void readPrefix() override; + protected: Block readImpl() override; private: Aggregator aggregator; bool final; - size_t threads; + size_t reading_threads; + size_t merging_threads; bool started = false; - bool has_two_level = false; - bool has_overflows = false; + volatile bool has_two_level = false; + volatile bool has_overflows = false; int current_bucket_num = -1; struct Input @@ -64,9 +60,13 @@ private: using BlocksToMerge = Poco::SharedPtr; + void start(); + /// Получить блоки, которые можно мерджить. Это позволяет мерджить их параллельно в отдельных потоках. BlocksToMerge getNextBlocksToMerge(); + std::unique_ptr reading_pool; + /// Для параллельного мерджа. struct OutputData { diff --git a/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h index c590007b4f6..23ebbfa5121 100644 --- a/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h @@ -116,7 +116,8 @@ protected: << (files.sum_size_compressed / 1048576.0) << " MiB compressed, " << (files.sum_size_uncompressed / 1048576.0) << " MiB uncompressed."); - impl.reset(new MergingAggregatedMemoryEfficientBlockInputStream(input_streams, params, final, temporary_data_merge_threads)); + impl.reset(new MergingAggregatedMemoryEfficientBlockInputStream( + input_streams, params, final, temporary_data_merge_threads, temporary_data_merge_threads)); } } diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp index 28bdefa9c84..9e287d6778f 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp @@ -49,7 +49,7 @@ Block AggregatingBlockInputStream::readImpl() << (files.sum_size_compressed / 1048576.0) << " MiB compressed, " << (files.sum_size_uncompressed / 1048576.0) << " MiB uncompressed."); - impl.reset(new MergingAggregatedMemoryEfficientBlockInputStream(input_streams, params, final, 1)); + impl.reset(new MergingAggregatedMemoryEfficientBlockInputStream(input_streams, params, final, 1, 1)); } } diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 9376fd089d4..78fe989c6c6 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -8,12 +8,15 @@ namespace DB MergingAggregatedMemoryEfficientBlockInputStream::MergingAggregatedMemoryEfficientBlockInputStream( - BlockInputStreams inputs_, const Aggregator::Params & params, bool final_, size_t threads_) - : aggregator(params), final(final_), threads(threads_), inputs(inputs_.begin(), inputs_.end()) + BlockInputStreams inputs_, const Aggregator::Params & params, bool final_, size_t reading_threads_, size_t merging_threads_) + : aggregator(params), final(final_), + reading_threads(std::min(reading_threads_, inputs_.size())), merging_threads(merging_threads_), + inputs(inputs_.begin(), inputs_.end()) { children = inputs_; } + String MergingAggregatedMemoryEfficientBlockInputStream::getID() const { std::stringstream res; @@ -24,21 +27,55 @@ String MergingAggregatedMemoryEfficientBlockInputStream::getID() const return res.str(); } -Block MergingAggregatedMemoryEfficientBlockInputStream::readImpl() + +void MergingAggregatedMemoryEfficientBlockInputStream::readPrefix() { - if (threads == 1) + start(); +} + + +void MergingAggregatedMemoryEfficientBlockInputStream::start() +{ + if (started) + return; + + started = true; + + /// Если child - RemoteBlockInputStream, то child->readPrefix() отправляет запрос на удалённый сервер, инициируя вычисления. + + if (reading_threads == 1) { - /// Если child - RemoteBlockInputStream, то отправляет запрос на все удалённые серверы, инициируя вычисления. - /** NOTE: Если соединения ещё не установлены, то устанавливает их последовательно. - * И отправляет запрос последовательно. Это медленно. - */ - if (!started) + for (auto & child : children) + child->readPrefix(); + } + else + { + reading_pool.reset(new boost::threadpool::pool(reading_threads)); + + size_t num_children = children.size(); + std::vector> tasks(num_children); + for (size_t i = 0; i < num_children; ++i) { - started = true; - for (auto & child : children) - child->readPrefix(); + auto & child = children[i]; + auto & task = tasks[i]; + + task = std::packaged_task([&child] { child->readPrefix(); }); + reading_pool->schedule([&task] { task(); }); } + reading_pool->wait(); + for (auto & task : tasks) + task.get_future().get(); + } +} + + +Block MergingAggregatedMemoryEfficientBlockInputStream::readImpl() +{ + start(); + + if (merging_threads == 1) + { if (BlocksToMerge blocks_to_merge = getNextBlocksToMerge()) return aggregator.mergeBlocks(*blocks_to_merge, final); return {}; @@ -51,32 +88,14 @@ Block MergingAggregatedMemoryEfficientBlockInputStream::readImpl() if (!parallel_merge_data) { - parallel_merge_data.reset(new ParallelMergeData(threads)); + parallel_merge_data.reset(new ParallelMergeData(merging_threads)); auto & pool = parallel_merge_data->pool; - /** Если child - RemoteBlockInputStream, то соединения и отправку запроса тоже будем делать параллельно. - */ - started = true; - size_t num_children = children.size(); - std::vector> tasks(num_children); - for (size_t i = 0; i < num_children; ++i) - { - auto & child = children[i]; - auto & task = tasks[i]; - - task = std::packaged_task([&child] { child->readPrefix(); }); - pool.schedule([&task] { task(); }); - } - - pool.wait(); - for (auto & task : tasks) - task.get_future().get(); - /** Создаём потоки, которые будут получать и мерджить данные. */ - for (size_t i = 0; i < threads; ++i) + for (size_t i = 0; i < merging_threads; ++i) pool.schedule(std::bind(&MergingAggregatedMemoryEfficientBlockInputStream::mergeThread, this, current_memory_tracker)); } @@ -97,6 +116,9 @@ Block MergingAggregatedMemoryEfficientBlockInputStream::readImpl() MergingAggregatedMemoryEfficientBlockInputStream::~MergingAggregatedMemoryEfficientBlockInputStream() { + if (reading_pool) + reading_pool->wait(); + if (parallel_merge_data) { LOG_TRACE((&Logger::get("MergingAggregatedMemoryEfficientBlockInputStream")), "Waiting for threads to finish"); @@ -115,7 +137,13 @@ void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread(MemoryTracker { while (true) { - /// Получение следующих блоков делается последовательно, а мердж - параллельно. + /** Получение следующих блоков делается в одном пуле потоков, а мердж - в другом. + * Это весьма сложное взаимодействие. + * Каждый раз, + * - reading_threads читают по одному следующему блоку из каждого источника; + * - из этих блоков составляется группа блоков для слияния; + * - один из merging_threads выполняет слияние этой группы блоков; + */ BlocksToMerge blocks_to_merge; { @@ -173,14 +201,16 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate ++current_bucket_num; - for (auto & input : inputs) + /// Получить из источника следующий блок с номером корзины не больше current_bucket_num. + + auto need_that_input = [this] (Input & input) { - if (input.is_exhausted) - continue; - - if (input.block.info.bucket_num >= current_bucket_num) - continue; + return !input.is_exhausted + && input.block.info.bucket_num < current_bucket_num; + }; + auto read_from_input = [this] (Input & input) + { /// Если придёт блок не с основными данными, а с overflows, то запомним его и повторим чтение. while (true) { @@ -221,6 +251,35 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate break; } + }; + + if (reading_threads == 1) + { + for (auto & input : inputs) + if (need_that_input(input)) + read_from_input(input); + } + else + { + size_t num_inputs = inputs.size(); + std::vector> tasks; + tasks.reserve(num_inputs); + + for (size_t i = 0; i < num_inputs; ++i) + { + auto & input = inputs[i]; + auto & task = tasks[i]; + + if (need_that_input(input)) + { + tasks.emplace_back([&input, &read_from_input] { read_from_input(input); }); + reading_pool->schedule([&task] { task(); }); + } + } + + reading_pool->wait(); + for (auto & task : tasks) + task.get_future().get(); } while (true) diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 47cb0678e8a..556e149a9c4 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -930,6 +930,7 @@ void InterpreterSelectQuery::executeMergeAggregated(bool overflow_row, bool fina else { streams[0] = new MergingAggregatedMemoryEfficientBlockInputStream(streams, params, final, + settings.max_threads, settings.aggregation_memory_efficient_merge_threads ? size_t(settings.aggregation_memory_efficient_merge_threads) : original_max_threads); From 2e08b4e81696a5be3009826e6400dcc72cbed782 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 5 Dec 2015 08:54:10 +0300 Subject: [PATCH 04/40] dbms: fixed error [#METR-17000]. --- .../MergingAggregatedMemoryEfficientBlockInputStream.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 78fe989c6c6..daf9b478f2e 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -265,14 +265,12 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate std::vector> tasks; tasks.reserve(num_inputs); - for (size_t i = 0; i < num_inputs; ++i) + for (auto & input : inputs) { - auto & input = inputs[i]; - auto & task = tasks[i]; - if (need_that_input(input)) { tasks.emplace_back([&input, &read_from_input] { read_from_input(input); }); + auto & task = tasks.back(); reading_pool->schedule([&task] { task(); }); } } From 7fa1a57165334205e4a35924d1db280b8fd84674 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 5 Dec 2015 10:01:18 +0300 Subject: [PATCH 05/40] dbms: fixed excessive memory allocation when splitting single-level block in Aggregator [#METR-17000]. --- .../DB/Columns/ColumnAggregateFunction.h | 6 +- dbms/include/DB/Columns/ColumnArray.h | 8 +-- dbms/include/DB/Columns/ColumnConst.h | 2 +- dbms/include/DB/Columns/ColumnFixedString.h | 11 +++- dbms/include/DB/Columns/ColumnString.h | 4 +- dbms/include/DB/Columns/ColumnTuple.h | 4 +- dbms/include/DB/Columns/ColumnVector.h | 6 +- dbms/include/DB/Columns/ColumnsCommon.h | 2 +- dbms/include/DB/Columns/IColumn.h | 5 +- dbms/include/DB/Columns/IColumnDummy.h | 2 +- .../DB/Functions/FunctionsHigherOrder.h | 2 +- .../DistributedBlockOutputStream.h | 5 +- .../MergeTree/MergeTreeBlockInputStream.h | 2 +- .../DB/Storages/MergeTree/MergeTreeReadPool.h | 2 +- .../MergeTreeThreadBlockInputStream.h | 2 +- dbms/src/Columns/ColumnArray.cpp | 55 +++++++++++-------- dbms/src/Columns/ColumnsCommon.cpp | 33 ++++++----- dbms/src/Common/VirtualColumnUtils.cpp | 2 +- .../CollapsingFinalBlockInputStream.cpp | 2 +- .../DataStreams/DistinctBlockInputStream.cpp | 2 +- .../DataStreams/FilterBlockInputStream.cpp | 4 +- .../TotalsHavingBlockInputStream.cpp | 2 +- dbms/src/Interpreters/Aggregator.cpp | 5 +- dbms/src/Interpreters/Join.cpp | 2 +- 24 files changed, 104 insertions(+), 66 deletions(-) diff --git a/dbms/include/DB/Columns/ColumnAggregateFunction.h b/dbms/include/DB/Columns/ColumnAggregateFunction.h index ee2feddbe31..cce96eb80d8 100644 --- a/dbms/include/DB/Columns/ColumnAggregateFunction.h +++ b/dbms/include/DB/Columns/ColumnAggregateFunction.h @@ -211,7 +211,7 @@ public: memcpy(&data[old_size], &src_concrete.getData()[start], length * sizeof(data[0])); } - ColumnPtr filter(const Filter & filter) const override + ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override { size_t size = getData().size(); if (size != filter.size()) @@ -225,7 +225,9 @@ public: auto & res_data = res_->getData(); - res_data.reserve(size); + if (result_size_hint) + res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + for (size_t i = 0; i < size; ++i) if (filter[i]) res_data.push_back(getData()[i]); diff --git a/dbms/include/DB/Columns/ColumnArray.h b/dbms/include/DB/Columns/ColumnArray.h index cea68ab78eb..14e683006b5 100644 --- a/dbms/include/DB/Columns/ColumnArray.h +++ b/dbms/include/DB/Columns/ColumnArray.h @@ -175,7 +175,7 @@ public: getOffsets().push_back(getOffsets().size() == 0 ? 0 : getOffsets().back()); } - ColumnPtr filter(const Filter & filt) const override; + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override; @@ -310,10 +310,10 @@ private: /// Специализации для функции filter. template - ColumnPtr filterNumber(const Filter & filt) const; + ColumnPtr filterNumber(const Filter & filt, ssize_t result_size_hint) const; - ColumnPtr filterString(const Filter & filt) const; - ColumnPtr filterGeneric(const Filter & filt) const; + ColumnPtr filterString(const Filter & filt, ssize_t result_size_hint) const; + ColumnPtr filterGeneric(const Filter & filt, ssize_t result_size_hint) const; }; diff --git a/dbms/include/DB/Columns/ColumnConst.h b/dbms/include/DB/Columns/ColumnConst.h index c7173c5d46c..b6bfa226ada 100644 --- a/dbms/include/DB/Columns/ColumnConst.h +++ b/dbms/include/DB/Columns/ColumnConst.h @@ -105,7 +105,7 @@ public: throw Exception("Method deserializeAndInsertFromArena is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - ColumnPtr filter(const Filter & filt) const override + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override { if (s != filt.size()) throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); diff --git a/dbms/include/DB/Columns/ColumnFixedString.h b/dbms/include/DB/Columns/ColumnFixedString.h index e025e784d90..693cbed9668 100644 --- a/dbms/include/DB/Columns/ColumnFixedString.h +++ b/dbms/include/DB/Columns/ColumnFixedString.h @@ -189,7 +189,7 @@ public: memcpy(&chars[old_size], &src_concrete.chars[start * n], length * n); } - ColumnPtr filter(const IColumn::Filter & filt) const override + ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override { size_t col_size = size(); if (col_size != filt.size()) @@ -197,7 +197,9 @@ public: ColumnFixedString * res_ = new ColumnFixedString(n); ColumnPtr res = res_; - res_->chars.reserve(chars.size()); + + if (result_size_hint) + res_->chars.reserve(result_size_hint > 0 ? result_size_hint * n : chars.size()); size_t offset = 0; for (size_t i = 0; i < col_size; ++i, offset += n) @@ -276,6 +278,11 @@ public: max = String(); } + void reserve(size_t size) override + { + chars.reserve(n * size); + }; + Chars_t & getChars() { return chars; } const Chars_t & getChars() const { return chars; } diff --git a/dbms/include/DB/Columns/ColumnString.h b/dbms/include/DB/Columns/ColumnString.h index 659ecd0ca4f..9c706d2bd40 100644 --- a/dbms/include/DB/Columns/ColumnString.h +++ b/dbms/include/DB/Columns/ColumnString.h @@ -178,7 +178,7 @@ public: } } - ColumnPtr filter(const Filter & filt) const override + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override { if (offsets.size() == 0) return new ColumnString; @@ -189,7 +189,7 @@ public: Chars_t & res_chars = res->chars; Offsets_t & res_offsets = res->offsets; - filterArraysImpl(chars, offsets, res_chars, res_offsets, filt); + filterArraysImpl(chars, offsets, res_chars, res_offsets, filt, result_size_hint); return res_; } diff --git a/dbms/include/DB/Columns/ColumnTuple.h b/dbms/include/DB/Columns/ColumnTuple.h index ebc13236eb9..439e0250817 100644 --- a/dbms/include/DB/Columns/ColumnTuple.h +++ b/dbms/include/DB/Columns/ColumnTuple.h @@ -123,12 +123,12 @@ public: start, length); } - ColumnPtr filter(const Filter & filt) const override + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override { Block res_block = data.cloneEmpty(); for (size_t i = 0; i < columns.size(); ++i) - res_block.unsafeGetByPosition(i).column = data.unsafeGetByPosition(i).column->filter(filt); + res_block.unsafeGetByPosition(i).column = data.unsafeGetByPosition(i).column->filter(filt, result_size_hint); return new ColumnTuple(res_block); } diff --git a/dbms/include/DB/Columns/ColumnVector.h b/dbms/include/DB/Columns/ColumnVector.h index 60e619d429b..2bce7aee7fb 100644 --- a/dbms/include/DB/Columns/ColumnVector.h +++ b/dbms/include/DB/Columns/ColumnVector.h @@ -270,7 +270,7 @@ public: memcpy(&data[old_size], &src_vec.data[start], length * sizeof(data[0])); } - ColumnPtr filter(const IColumn::Filter & filt) const override + ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override { size_t size = data.size(); if (size != filt.size()) @@ -279,7 +279,9 @@ public: Self * res_ = new Self; ColumnPtr res = res_; typename Self::Container_t & res_data = res_->getData(); - res_data.reserve(size); + + if (result_size_hint) + res_data.reserve(result_size_hint > 0 ? result_size_hint : size); /** Чуть более оптимизированная версия. * Исходит из допущения, что часто куски последовательно идущих значений diff --git a/dbms/include/DB/Columns/ColumnsCommon.h b/dbms/include/DB/Columns/ColumnsCommon.h index bebb7195363..c77c9f966f9 100644 --- a/dbms/include/DB/Columns/ColumnsCommon.h +++ b/dbms/include/DB/Columns/ColumnsCommon.h @@ -17,6 +17,6 @@ template void filterArraysImpl( const PODArray & src_elems, const IColumn::Offsets_t & src_offsets, PODArray & res_elems, IColumn::Offsets_t & res_offsets, - const IColumn::Filter & filt); + const IColumn::Filter & filt, ssize_t result_size_hint); } diff --git a/dbms/include/DB/Columns/IColumn.h b/dbms/include/DB/Columns/IColumn.h index 7bb0d91a2b9..b64caf4d8f1 100644 --- a/dbms/include/DB/Columns/IColumn.h +++ b/dbms/include/DB/Columns/IColumn.h @@ -176,9 +176,12 @@ public: /** Оставить только значения, соответствующие фильтру. * Используется для операции WHERE / HAVING. + * Если result_size_hint > 0, то сделать reserve этого размера у результата; + * если 0, то не делать reserve, + * иначе сделать reserve по размеру исходного столбца. */ typedef PODArray Filter; - virtual SharedPtr filter(const Filter & filt) const = 0; + virtual SharedPtr filter(const Filter & filt, ssize_t result_size_hint) const = 0; /** Переставить значения местами, используя указанную перестановку. * Используется при сортировке. diff --git a/dbms/include/DB/Columns/IColumnDummy.h b/dbms/include/DB/Columns/IColumnDummy.h index 3e37c417e5e..95eb2f22b10 100644 --- a/dbms/include/DB/Columns/IColumnDummy.h +++ b/dbms/include/DB/Columns/IColumnDummy.h @@ -50,7 +50,7 @@ public: s += length; } - ColumnPtr filter(const Filter & filt) const override + ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override { return cloneDummy(countBytesInFilter(filt)); } diff --git a/dbms/include/DB/Functions/FunctionsHigherOrder.h b/dbms/include/DB/Functions/FunctionsHigherOrder.h index 3a1857caef4..31ce67f0940 100644 --- a/dbms/include/DB/Functions/FunctionsHigherOrder.h +++ b/dbms/include/DB/Functions/FunctionsHigherOrder.h @@ -77,7 +77,7 @@ struct ArrayFilterImpl } const IColumn::Filter & filter = column_filter->getData(); - ColumnPtr filtered = array->getData().filter(filter); + ColumnPtr filtered = array->getData().filter(filter, -1); const IColumn::Offsets_t & in_offsets = array->getOffsets(); ColumnArray::ColumnOffsets_t * column_offsets = new ColumnArray::ColumnOffsets_t(in_offsets.size()); diff --git a/dbms/include/DB/Storages/Distributed/DistributedBlockOutputStream.h b/dbms/include/DB/Storages/Distributed/DistributedBlockOutputStream.h index 38601adf9b4..d49456a6be0 100644 --- a/dbms/include/DB/Storages/Distributed/DistributedBlockOutputStream.h +++ b/dbms/include/DB/Storages/Distributed/DistributedBlockOutputStream.h @@ -124,12 +124,15 @@ private: auto filters = createFilters(block); const auto num_shards = storage.cluster.getShardsInfo().size(); + + ssize_t size_hint = ((block.rowsInFirstColumn() + num_shards - 1) / num_shards) * 1.1; /// Число 1.1 выбрано наугад. + for (size_t i = 0; i < num_shards; ++i) { auto target_block = block.cloneEmpty(); for (size_t col = 0; col < num_cols; ++col) - target_block.getByPosition(col).column = columns[col]->filter(filters[i]); + target_block.getByPosition(col).column = columns[col]->filter(filters[i], size_hint); if (target_block.rowsInFirstColumn()) writeImpl(target_block, i); diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h index 75a1eee1992..e6fe9e349fb 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeBlockInputStream.h @@ -324,7 +324,7 @@ protected: ColumnWithTypeAndName & column = res.getByPosition(i); if (column.name == prewhere_column && res.columns() > 1) continue; - column.column = column.column->filter(column_name_set.count(column.name) ? post_filter : pre_filter); + column.column = column.column->filter(column_name_set.count(column.name) ? post_filter : pre_filter, -1); rows = column.column->size(); } diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeReadPool.h b/dbms/include/DB/Storages/MergeTree/MergeTreeReadPool.h index 3ca8fc1e100..f982d7a882c 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeReadPool.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeReadPool.h @@ -150,7 +150,7 @@ public: per_part_remove_prewhere_column[part_idx], per_part_should_reorder[part_idx]); } -public: +private: std::vector fillPerPartInfo( RangesInDataParts & parts, const ExpressionActionsPtr & prewhere_actions, const String & prewhere_column_name, const bool check_columns) diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeThreadBlockInputStream.h b/dbms/include/DB/Storages/MergeTree/MergeTreeThreadBlockInputStream.h index d340d47f08a..8e26219f4a4 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeThreadBlockInputStream.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeThreadBlockInputStream.h @@ -255,7 +255,7 @@ private: if (col.name == prewhere_column && res.columns() > 1) continue; col.column = - col.column->filter(task->column_name_set.count(col.name) ? post_filter : pre_filter); + col.column->filter(task->column_name_set.count(col.name) ? post_filter : pre_filter, -1); rows = col.column->size(); } diff --git a/dbms/src/Columns/ColumnArray.cpp b/dbms/src/Columns/ColumnArray.cpp index 09dd8618cce..e2cc3aff5d2 100644 --- a/dbms/src/Columns/ColumnArray.cpp +++ b/dbms/src/Columns/ColumnArray.cpp @@ -41,24 +41,24 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng } -ColumnPtr ColumnArray::filter(const Filter & filt) const +ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) const { - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterNumber(filt); - if (typeid_cast(data.get())) return filterString(filt); - return filterGeneric(filt); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterNumber(filt, result_size_hint); + if (typeid_cast(data.get())) return filterString(filt, result_size_hint); + return filterGeneric(filt, result_size_hint); } template -ColumnPtr ColumnArray::filterNumber(const Filter & filt) const +ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint) const { if (getOffsets().size() == 0) return new ColumnArray(data); @@ -69,11 +69,11 @@ ColumnPtr ColumnArray::filterNumber(const Filter & filt) const PODArray & res_elems = static_cast &>(res->getData()).getData(); Offsets_t & res_offsets = res->getOffsets(); - filterArraysImpl(static_cast &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt); + filterArraysImpl(static_cast &>(*data).getData(), getOffsets(), res_elems, res_offsets, filt, result_size_hint); return res_; } -ColumnPtr ColumnArray::filterString(const Filter & filt) const +ColumnPtr ColumnArray::filterString(const Filter & filt, ssize_t result_size_hint) const { size_t col_size = getOffsets().size(); if (col_size != filt.size()) @@ -94,9 +94,12 @@ ColumnPtr ColumnArray::filterString(const Filter & filt) const Offsets_t & res_string_offsets = typeid_cast(res->getData()).getOffsets(); Offsets_t & res_offsets = res->getOffsets(); - res_chars.reserve(src_chars.size()); - res_string_offsets.reserve(src_string_offsets.size()); - res_offsets.reserve(col_size); + if (result_size_hint < 0) /// Остальные случаи не рассматриваем. + { + res_chars.reserve(src_chars.size()); + res_string_offsets.reserve(src_string_offsets.size()); + res_offsets.reserve(col_size); + } Offset_t prev_src_offset = 0; Offset_t prev_src_string_offset = 0; @@ -139,7 +142,7 @@ ColumnPtr ColumnArray::filterString(const Filter & filt) const return res_; } -ColumnPtr ColumnArray::filterGeneric(const Filter & filt) const +ColumnPtr ColumnArray::filterGeneric(const Filter & filt, ssize_t result_size_hint) const { size_t size = getOffsets().size(); if (size != filt.size()) @@ -159,10 +162,18 @@ ColumnPtr ColumnArray::filterGeneric(const Filter & filt) const ColumnArray * res_ = new ColumnArray(data); ColumnPtr res = res_; - res_->data = data->filter(nested_filt); + + ssize_t nested_result_size_hint = 0; + if (result_size_hint < 0) + nested_result_size_hint = result_size_hint; + else if (result_size_hint && result_size_hint < 1000000000 && data->size() < 1000000000) /// Избегаем переполнения. + nested_result_size_hint = result_size_hint * data->size() / size; + + res_->data = data->filter(nested_filt, nested_result_size_hint); Offsets_t & res_offsets = res_->getOffsets(); - res_offsets.reserve(size); + if (result_size_hint) + res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size); size_t current_offset = 0; for (size_t i = 0; i < size; ++i) diff --git a/dbms/src/Columns/ColumnsCommon.cpp b/dbms/src/Columns/ColumnsCommon.cpp index cd3b15adcb7..f9e4137a1c2 100644 --- a/dbms/src/Columns/ColumnsCommon.cpp +++ b/dbms/src/Columns/ColumnsCommon.cpp @@ -47,14 +47,21 @@ template void filterArraysImpl( const PODArray & src_elems, const IColumn::Offsets_t & src_offsets, PODArray & res_elems, IColumn::Offsets_t & res_offsets, - const IColumn::Filter & filt) + const IColumn::Filter & filt, ssize_t result_size_hint) { const size_t size = src_offsets.size(); if (size != filt.size()) throw Exception("Size of filter doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - res_elems.reserve(src_elems.size()); - res_offsets.reserve(size); + if (result_size_hint) + { + res_offsets.reserve(result_size_hint > 0 ? result_size_hint : size); + + if (result_size_hint < 0) + res_elems.reserve(src_elems.size()); + else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Избегаем переполнения. + res_elems.reserve(result_size_hint * src_elems.size() / size); + } IColumn::Offset_t current_src_offset = 0; @@ -150,24 +157,24 @@ void filterArraysImpl( /// Явные инстанцирования - чтобы не размещать реализацию функции выше в заголовочном файле. template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); template void filterArraysImpl( - const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &); + const PODArray &, const IColumn::Offsets_t &, PODArray &, IColumn::Offsets_t &, const IColumn::Filter &, ssize_t); } diff --git a/dbms/src/Common/VirtualColumnUtils.cpp b/dbms/src/Common/VirtualColumnUtils.cpp index 243dbabd2a0..de94e18a56b 100644 --- a/dbms/src/Common/VirtualColumnUtils.cpp +++ b/dbms/src/Common/VirtualColumnUtils.cpp @@ -169,7 +169,7 @@ bool filterBlockWithQuery(ASTPtr query, Block & block, const Context & context) for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.getByPosition(i).column; - column = column->filter(filter); + column = column->filter(filter, -1); } return true; diff --git a/dbms/src/DataStreams/CollapsingFinalBlockInputStream.cpp b/dbms/src/DataStreams/CollapsingFinalBlockInputStream.cpp index 5a975e7932c..b4382214f50 100644 --- a/dbms/src/DataStreams/CollapsingFinalBlockInputStream.cpp +++ b/dbms/src/DataStreams/CollapsingFinalBlockInputStream.cpp @@ -153,7 +153,7 @@ Block CollapsingFinalBlockInputStream::readImpl() Block block = merging_block->block; for (size_t i = 0; i < block.columns(); ++i) - block.getByPosition(i).column = block.getByPosition(i).column->filter(merging_block->filter); + block.getByPosition(i).column = block.getByPosition(i).column->filter(merging_block->filter, -1); output_blocks.pop_back(); delete merging_block; diff --git a/dbms/src/DataStreams/DistinctBlockInputStream.cpp b/dbms/src/DataStreams/DistinctBlockInputStream.cpp index 5ca94d07084..7647f831462 100644 --- a/dbms/src/DataStreams/DistinctBlockInputStream.cpp +++ b/dbms/src/DataStreams/DistinctBlockInputStream.cpp @@ -107,7 +107,7 @@ Block DistinctBlockInputStream::readImpl() size_t all_columns = block.columns(); for (size_t i = 0; i < all_columns; ++i) - block.getByPosition(i).column = block.getByPosition(i).column->filter(filter); + block.getByPosition(i).column = block.getByPosition(i).column->filter(filter, -1); return block; } diff --git a/dbms/src/DataStreams/FilterBlockInputStream.cpp b/dbms/src/DataStreams/FilterBlockInputStream.cpp index c42faaf5cb9..2a0283a49b9 100644 --- a/dbms/src/DataStreams/FilterBlockInputStream.cpp +++ b/dbms/src/DataStreams/FilterBlockInputStream.cpp @@ -77,7 +77,7 @@ Block FilterBlockInputStream::readImpl() if (first_non_constant_column != static_cast(filter_column)) { ColumnWithTypeAndName & current_column = res.getByPosition(first_non_constant_column); - current_column.column = current_column.column->filter(filter); + current_column.column = current_column.column->filter(filter, -1); filtered_rows = current_column.column->size(); } else @@ -116,7 +116,7 @@ Block FilterBlockInputStream::readImpl() if (current_column.column->isConst()) current_column.column = current_column.column->cut(0, filtered_rows); else - current_column.column = current_column.column->filter(filter); + current_column.column = current_column.column->filter(filter, -1); } return res; diff --git a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp index 1967d117bb1..fd52c3aced4 100644 --- a/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp +++ b/dbms/src/DataStreams/TotalsHavingBlockInputStream.cpp @@ -107,7 +107,7 @@ Block TotalsHavingBlockInputStream::readImpl() for (size_t i = 0; i < columns; ++i) { ColumnWithTypeAndName & current_column = finalized.getByPosition(i); - current_column.column = current_column.column->filter(filter); + current_column.column = current_column.column->filter(filter, -1); if (current_column.column->empty()) { finalized.clear(); diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 5194dfb0513..3a49378eee1 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -2206,6 +2206,9 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( filter[i] = 1; } + ssize_t size_hint = ((source.rowsInFirstColumn() + method.data.NUM_BUCKETS - 1) + / method.data.NUM_BUCKETS) * 1.1; /// Число 1.1 выбрано наугад. + for (size_t bucket = 0, size = destinations.size(); bucket < size; ++bucket) { const auto & filter = filters[bucket]; @@ -2219,7 +2222,7 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( for (size_t j = 0; j < columns; ++j) { const ColumnWithTypeAndName & src_col = source.unsafeGetByPosition(j); - dst.insert({src_col.column->filter(filter), src_col.type, src_col.name}); + dst.insert({src_col.column->filter(filter, size_hint), src_col.type, src_col.name}); /** Вставленные в блок столбцы типа ColumnAggregateFunction будут владеть состояниями агрегатных функций * путём удержания SharedPtr-а на исходный столбец. См. ColumnAggregateFunction.h diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 59356d193ae..ced3adac399 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -740,7 +740,7 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const /// Если ANY INNER|RIGHT JOIN - фильтруем все столбцы кроме новых. if (filter) for (size_t i = 0; i < existing_columns; ++i) - block.getByPosition(i).column = block.getByPosition(i).column->filter(*filter); + block.getByPosition(i).column = block.getByPosition(i).column->filter(*filter, -1); /// Если ALL ... JOIN - размножаем все столбцы кроме новых. if (offsets_to_replicate) From b76731a578d8c7eec1b5ab52cc8151dd1cde9df8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 5 Dec 2015 10:03:17 +0300 Subject: [PATCH 06/40] dbms: improved startup times for lagged replica [#METR-18563]. --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index d83070dbf2f..762b84ba5af 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -770,12 +770,19 @@ void StorageReplicatedMergeTree::loadQueue() Strings children = zookeeper->getChildren(replica_path + "/queue"); std::sort(children.begin(), children.end()); + + std::vector> futures; + futures.reserve(children.size()); + for (const String & child : children) + futures.emplace_back(child, zookeeper->asyncGet(replica_path + "/queue/" + child)); + + for (auto & future : futures) { - zkutil::Stat stat; - String s = zookeeper->get(replica_path + "/queue/" + child, &stat); - LogEntryPtr entry = LogEntry::parse(s, stat); - entry->znode_name = child; + zkutil::ZooKeeper::ValueAndStat res = future.second.get(); + LogEntryPtr entry = LogEntry::parse(res.value, res.stat); + + entry->znode_name = future.first; entry->addResultToVirtualParts(*this); queue.push_back(entry); } From ed0c42253dbbb58c1a85133dcd558a971241ef61 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Dec 2015 08:43:36 +0300 Subject: [PATCH 07/40] dbms: MergingAggregatedMemoryEfficientBlockInputStream: correctly set memory tracker [#METR-17000]. --- ...rgingAggregatedMemoryEfficientBlockInputStream.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index daf9b478f2e..7407e03908e 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -59,7 +59,14 @@ void MergingAggregatedMemoryEfficientBlockInputStream::start() auto & child = children[i]; auto & task = tasks[i]; - task = std::packaged_task([&child] { child->readPrefix(); }); + auto memory_tracker = current_memory_tracker; + task = std::packaged_task([&child, memory_tracker] + { + /// memory_tracker и имя потока устанавливается здесь. Далее для всех задач в reading_pool это уже не требуется. + current_memory_tracker = memory_tracker; + setThreadName("MergeAggReadThr"); + child->readPrefix(); + }); reading_pool->schedule([&task] { task(); }); } @@ -130,7 +137,7 @@ MergingAggregatedMemoryEfficientBlockInputStream::~MergingAggregatedMemoryEffici void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread(MemoryTracker * memory_tracker) { - setThreadName("MrgAggMemEffThr"); + setThreadName("MergeAggMergThr"); current_memory_tracker = memory_tracker; try From 2b1614825e0472cc31fce7d6c6180ba455cbfe5e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Dec 2015 10:32:47 +0300 Subject: [PATCH 08/40] dbms: improvement [#METR-17000]. --- dbms/include/DB/Interpreters/Aggregator.h | 4 ++ dbms/src/Interpreters/Aggregator.cpp | 55 +++++------------------ 2 files changed, 16 insertions(+), 43 deletions(-) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index da5f4bafd31..3d9729ba994 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -714,6 +714,10 @@ struct AggregatedDataVariants : private boost::noncopyable M(key8) \ M(key16) \ + #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \ + APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ + APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ + bool isConvertibleToTwoLevel() const { switch (type) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 3a49378eee1..ed9ebcee97a 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1677,51 +1677,20 @@ AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_va mergeWithoutKeyDataImpl(non_empty_data); std::unique_ptr thread_pool; - if (max_threads > 1 && rows > 100000 /// TODO Сделать настраиваемый порог. - && res->isTwoLevel()) + if (max_threads > 1 && res->isTwoLevel()) thread_pool.reset(new boost::threadpool::pool(max_threads)); - /// TODO Упростить. - if (res->type == AggregatedDataVariants::Type::key8) - mergeSingleLevelDataImplkey8)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::key16) - mergeSingleLevelDataImplkey16)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::key32) - mergeSingleLevelDataImplkey32)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::key64) - mergeSingleLevelDataImplkey64)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::key_string) - mergeSingleLevelDataImplkey_string)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::key_fixed_string) - mergeSingleLevelDataImplkey_fixed_string)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::keys128) - mergeSingleLevelDataImplkeys128)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::keys256) - mergeSingleLevelDataImplkeys256)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::hashed) - mergeSingleLevelDataImplhashed)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::concat) - mergeSingleLevelDataImplconcat)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::serialized) - mergeSingleLevelDataImplserialized)::element_type>(non_empty_data); - else if (res->type == AggregatedDataVariants::Type::key32_two_level) - mergeTwoLevelDataImplkey32_two_level)::element_type>(non_empty_data, thread_pool.get()); - else if (res->type == AggregatedDataVariants::Type::key64_two_level) - mergeTwoLevelDataImplkey64_two_level)::element_type>(non_empty_data, thread_pool.get()); - else if (res->type == AggregatedDataVariants::Type::key_string_two_level) - mergeTwoLevelDataImplkey_string_two_level)::element_type>(non_empty_data, thread_pool.get()); - else if (res->type == AggregatedDataVariants::Type::key_fixed_string_two_level) - mergeTwoLevelDataImplkey_fixed_string_two_level)::element_type>(non_empty_data, thread_pool.get()); - else if (res->type == AggregatedDataVariants::Type::keys128_two_level) - mergeTwoLevelDataImplkeys128_two_level)::element_type>(non_empty_data, thread_pool.get()); - else if (res->type == AggregatedDataVariants::Type::keys256_two_level) - mergeTwoLevelDataImplkeys256_two_level)::element_type>(non_empty_data, thread_pool.get()); - else if (res->type == AggregatedDataVariants::Type::hashed_two_level) - mergeTwoLevelDataImplhashed_two_level)::element_type>(non_empty_data, thread_pool.get()); - else if (res->type == AggregatedDataVariants::Type::concat_two_level) - mergeTwoLevelDataImplconcat_two_level)::element_type>(non_empty_data, thread_pool.get()); - else if (res->type == AggregatedDataVariants::Type::serialized_two_level) - mergeTwoLevelDataImplserialized_two_level)::element_type>(non_empty_data, thread_pool.get()); + if (false) {} +#define M(NAME) \ + else if (res->type == AggregatedDataVariants::Type::NAME) \ + mergeSingleLevelDataImplNAME)::element_type>(non_empty_data); + APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) +#undef M +#define M(NAME) \ + else if (res->type == AggregatedDataVariants::Type::NAME) \ + mergeTwoLevelDataImplNAME)::element_type>(non_empty_data, thread_pool.get()); + APPLY_FOR_VARIANTS_TWO_LEVEL(M) +#undef M else if (res->type != AggregatedDataVariants::Type::without_key) throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); From 0ef797d9f303b89c602ac5f793e5dfd2311077cd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Dec 2015 17:27:09 +0300 Subject: [PATCH 09/40] dbms: fixed error [#METR-17000]. --- dbms/include/DB/Common/HashTable/HashTable.h | 38 ++++++++--- dbms/include/DB/Interpreters/Aggregator.h | 7 ++ dbms/src/Interpreters/Aggregator.cpp | 72 +++++++++----------- 3 files changed, 69 insertions(+), 48 deletions(-) diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h index 98ac102dfb8..d295a967876 100644 --- a/dbms/include/DB/Common/HashTable/HashTable.h +++ b/dbms/include/DB/Common/HashTable/HashTable.h @@ -302,7 +302,11 @@ protected: void free() { - Allocator::free(buf, getBufferSizeInBytes()); + if (buf) + { + Allocator::free(buf, getBufferSizeInBytes()); + buf = nullptr; + } } @@ -397,6 +401,14 @@ protected: } + void destroyElements() + { + if (!__has_trivial_destructor(Cell)) + for (iterator it = begin(); it != end(); ++it) + it.ptr->~Cell(); + } + + public: typedef Key key_type; typedef typename Cell::value_type value_type; @@ -421,10 +433,7 @@ public: ~HashTable() { - if (!__has_trivial_destructor(Cell)) - for (iterator it = begin(); it != end(); ++it) - it.ptr->~Cell(); - + destroyElements(); free(); } @@ -789,6 +798,7 @@ public: { Cell::State::read(rb); + destroyElements(); this->clearHasZero(); m_size = 0; @@ -812,6 +822,7 @@ public: { Cell::State::readText(rb); + destroyElements(); this->clearHasZero(); m_size = 0; @@ -845,12 +856,23 @@ public: void clear() { - if (!__has_trivial_destructor(Cell)) - for (iterator it = begin(); it != end(); ++it) - it.ptr->~Cell(); + destroyElements(); + this->clearHasZero(); + m_size = 0; memset(buf, 0, grower.bufSize() * sizeof(*buf)); + } + + void clearAndShrink() + { + destroyElements(); + this->clearHasZero(); m_size = 0; + + free(); + Grower new_grower = grower; + new_grower.set(0); + alloc(new_grower); } size_t getBufferSizeInBytes() const diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 3d9729ba994..0be1bb65ff4 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -1130,6 +1130,13 @@ protected: size_t rows, Filler && filler) const; + template + Block convertOneBucketToBlock( + AggregatedDataVariants & data_variants, + Method & method, + bool final, + size_t bucket) const; + BlocksList prepareBlocksAndFillWithoutKey(AggregatedDataVariants & data_variants, bool final, bool is_overflows) const; BlocksList prepareBlocksAndFillSingleLevel(AggregatedDataVariants & data_variants, bool final) const; BlocksList prepareBlocksAndFillTwoLevel(AggregatedDataVariants & data_variants, bool final, boost::threadpool::pool * thread_pool) const; diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index ed9ebcee97a..fc5032cb511 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -824,6 +824,35 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, si } +template +Block Aggregator::convertOneBucketToBlock( + AggregatedDataVariants & data_variants, + Method & method, + bool final, + size_t bucket) const +{ + Block block = prepareBlockAndFill(data_variants, final, method.data.impls[bucket].size(), + [bucket, &method, this] ( + ColumnPlainPtrs & key_columns, + AggregateColumnsData & aggregate_columns, + ColumnPlainPtrs & final_aggregate_columns, + const Sizes & key_sizes, + bool final) + { + convertToBlockImpl(method, method.data.impls[bucket], + key_columns, aggregate_columns, final_aggregate_columns, key_sizes, final); + }); + + /** Для того, чтобы в случае исключения, агрегатор не уничтожал состояния агрегатных функций, владение которыми уже передано в block; + * А также для того, чтобы пораньше освободить память. + */ + method.data.impls[bucket].clearAndShrink(); + + block.info.bucket_num = bucket; + return block; +} + + template void Aggregator::writeToTemporaryFileImpl( AggregatedDataVariants & data_variants, @@ -839,19 +868,7 @@ void Aggregator::writeToTemporaryFileImpl( if (method.data.impls[bucket].empty()) continue; - Block block = prepareBlockAndFill(data_variants, false, method.data.impls[bucket].size(), - [bucket, &method, this] ( - ColumnPlainPtrs & key_columns, - AggregateColumnsData & aggregate_columns, - ColumnPlainPtrs & final_aggregate_columns, - const Sizes & key_sizes, - bool final) - { - convertToBlockImpl(method, method.data.impls[bucket], - key_columns, aggregate_columns, final_aggregate_columns, key_sizes, final); - }); - - block.info.bucket_num = bucket; + Block block = convertOneBucketToBlock(data_variants, method, false, bucket); out.write(block); size_t block_size_rows = block.rowsInFirstColumn(); @@ -1164,35 +1181,10 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( bool final, boost::threadpool::pool * thread_pool) const { - auto filler = [&method, this]( - ColumnPlainPtrs & key_columns, - AggregateColumnsData & aggregate_columns, - ColumnPlainPtrs & final_aggregate_columns, - const Sizes & key_sizes, - bool final, - size_t bucket) - { - convertToBlockImpl(method, method.data.impls[bucket], - key_columns, aggregate_columns, final_aggregate_columns, key_sizes, final); - }; - auto converter = [&](size_t bucket, MemoryTracker * memory_tracker) { current_memory_tracker = memory_tracker; - - Block block = prepareBlockAndFill(data_variants, final, method.data.impls[bucket].size(), - [bucket, &filler] ( - ColumnPlainPtrs & key_columns, - AggregateColumnsData & aggregate_columns, - ColumnPlainPtrs & final_aggregate_columns, - const Sizes & key_sizes, - bool final) - { - filler(key_columns, aggregate_columns, final_aggregate_columns, key_sizes, final, bucket); - }); - - block.info.bucket_num = bucket; - return block; + return convertOneBucketToBlock(data_variants, method, final, bucket); }; /// packaged_task используются, чтобы исключения автоматически прокидывались в основной поток. @@ -1615,7 +1607,7 @@ void NO_INLINE Aggregator::mergeTwoLevelDataImpl( AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_variants, size_t max_threads) { if (data_variants.empty()) - throw Exception("Empty data passed to Aggregator::merge().", ErrorCodes::EMPTY_DATA_PASSED); + throw Exception("Empty data passed to Aggregator::merge.", ErrorCodes::EMPTY_DATA_PASSED); LOG_TRACE(log, "Merging aggregated data"); From 071b1e367c7fabd2fc2a46d55eda1e297a1ae860 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Dec 2015 18:29:16 +0300 Subject: [PATCH 10/40] dbms: fixed error [#METR-19700]. --- .../DataStreams/ParallelAggregatingBlockInputStream.h | 11 +++++++++-- dbms/include/DB/DataStreams/ParallelInputsProcessor.h | 2 +- dbms/src/DataStreams/AggregatingBlockInputStream.cpp | 11 +++++++---- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h index 23ebbfa5121..de1db09a28f 100644 --- a/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h @@ -209,10 +209,14 @@ private: void onFinishThread(size_t thread_num) { - if (parent.aggregator.hasTemporaryFiles()) + if (!parent.isCancelled() && parent.aggregator.hasTemporaryFiles()) { /// Сбросим имеющиеся в оперативке данные тоже на диск. Так проще их потом объединять. auto & data = *parent.many_data[thread_num]; + + if (data.isConvertibleToTwoLevel()) + data.convertToTwoLevel(); + size_t rows = data.sizeWithoutOverflowRow(); if (rows) parent.aggregator.writeToTemporaryFile(data, rows); @@ -221,12 +225,15 @@ private: void onFinish() { - if (parent.aggregator.hasTemporaryFiles()) + if (!parent.isCancelled() && parent.aggregator.hasTemporaryFiles()) { /// Может так получиться, что какие-то данные ещё не сброшены на диск, /// потому что во время вызова onFinishThread ещё никакие данные не были сброшены на диск, а потом какие-то - были. for (auto & data : parent.many_data) { + if (data->isConvertibleToTwoLevel()) + data->convertToTwoLevel(); + size_t rows = data->sizeWithoutOverflowRow(); if (rows) parent.aggregator.writeToTemporaryFile(*data, rows); diff --git a/dbms/include/DB/DataStreams/ParallelInputsProcessor.h b/dbms/include/DB/DataStreams/ParallelInputsProcessor.h index 773d7080e3e..ae205e8708c 100644 --- a/dbms/include/DB/DataStreams/ParallelInputsProcessor.h +++ b/dbms/include/DB/DataStreams/ParallelInputsProcessor.h @@ -211,7 +211,7 @@ private: } } - handler.onFinish(); + handler.onFinish(); /// TODO Если в onFinish или onFinishThread эксепшен, то вызывается std::terminate. } } diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp index 9e287d6778f..618f2488499 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp @@ -32,10 +32,13 @@ Block AggregatingBlockInputStream::readImpl() ProfileEvents::increment(ProfileEvents::ExternalAggregationMerge); - /// Сбросим имеющиеся в оперативке данные тоже на диск. Так проще. - size_t rows = data_variants.sizeWithoutOverflowRow(); - if (rows) - aggregator.writeToTemporaryFile(data_variants, rows); + if (!isCancelled()) + { + /// Сбросим имеющиеся в оперативке данные тоже на диск. Так проще. + size_t rows = data_variants.sizeWithoutOverflowRow(); + if (rows) + aggregator.writeToTemporaryFile(data_variants, rows); + } const auto & files = aggregator.getTemporaryFiles(); BlockInputStreams input_streams; From 7dc5661521348cab48c7d8b961d63e4279069823 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Dec 2015 19:22:01 +0300 Subject: [PATCH 11/40] dbms: more memory efficient merging aggregated states [#METR-17000]. --- .../ParallelAggregatingBlockInputStream.h | 7 +- dbms/include/DB/Interpreters/Aggregator.h | 9 + dbms/src/Interpreters/Aggregator.cpp | 261 ++++++++++++++++++ 3 files changed, 271 insertions(+), 6 deletions(-) diff --git a/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h b/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h index de1db09a28f..f53a9ba3914 100644 --- a/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h +++ b/dbms/include/DB/DataStreams/ParallelAggregatingBlockInputStream.h @@ -88,13 +88,8 @@ protected: if (!aggregator.hasTemporaryFiles()) { /** Если все частично-агрегированные данные в оперативке, то мерджим их параллельно, тоже в оперативке. - * NOTE Если израсходовано больше половины допустимой памяти, то мерджить следовало бы более экономно. */ - AggregatedDataVariantsPtr data_variants = aggregator.merge(many_data, max_threads); - - if (data_variants) - impl.reset(new BlocksListBlockInputStream( - aggregator.convertToBlocks(*data_variants, final, max_threads))); + impl = aggregator.mergeAndConvertToBlocks(many_data, final, max_threads); } else { diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 0be1bb65ff4..8698779f66d 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -858,6 +858,10 @@ public: */ AggregatedDataVariantsPtr merge(ManyAggregatedDataVariants & data_variants, size_t max_threads); + /** Объединить несколько структур данных агрегации и выдать результат в виде потока блоков. + */ + std::unique_ptr mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads); + /** Объединить поток частично агрегированных блоков в одну структуру данных. * (Доагрегировать несколько блоков, которые представляют собой результат независимых агрегаций с удалённых серверов.) */ @@ -904,6 +908,7 @@ public: protected: friend struct AggregatedDataVariants; + friend class MergingAndConvertingBlockInputStream; Params params; @@ -1171,6 +1176,10 @@ protected: Block & block, AggregatedDataVariants & result) const; + template + void mergeBucketImpl( + ManyAggregatedDataVariants & data, Int32 bucket) const; + template void convertBlockToTwoLevelImpl( Method & method, diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index fc5032cb511..dc677a77c70 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -12,6 +13,7 @@ #include #include #include +#include #include #include @@ -1699,6 +1701,265 @@ AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_va } +template +void NO_INLINE Aggregator::mergeBucketImpl( + ManyAggregatedDataVariants & data, Int32 bucket) const +{ + /// Все результаты агрегации соединяем с первым. + AggregatedDataVariantsPtr & res = data[0]; + for (size_t i = 1, size = data.size(); i < size; ++i) + { + AggregatedDataVariants & current = *data[i]; + + mergeDataImpl( + getDataVariant(*res).data.impls[bucket], + getDataVariant(current).data.impls[bucket]); + } +} + + +/** Объединят вместе состояния агрегации, превращает их в блоки и выдаёт потоково. + * Если состояния агрегации двухуровневые, то выдаёт блоки строго по порядку bucket_num. + * (Это важно при распределённой обработке.) + * При этом, может обрабатывать разные bucket-ы параллельно, используя до threads потоков. + * + * TODO Удалить обычную функцию Aggregator::merge и связанные с ней, в случае невостребованности. + */ +class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream +{ +public: + /** На вход подаётся набор непустых множеств частично агрегированных данных, + * которые все либо являются одноуровневыми, либо являются двухуровневыми. + */ + MergingAndConvertingBlockInputStream(Aggregator & aggregator_, ManyAggregatedDataVariants & data_, bool final_, size_t threads_) + : aggregator(aggregator_), data(data_), final(final_), threads(threads_) {} + + String getName() const override { return "MergingAndConverting"; } + + String getID() const override + { + std::stringstream res; + res << this; + return res.str(); + } + + ~MergingAndConvertingBlockInputStream() + { + if (parallel_merge_data) + { + LOG_TRACE(&Logger::get(__PRETTY_FUNCTION__), "Waiting for threads to finish"); + parallel_merge_data->pool.wait(); + } + } + +protected: + Block readImpl() override + { + if (data.empty()) + return {}; + + if (current_bucket_num >= NUM_BUCKETS) + return {}; + + AggregatedDataVariantsPtr & first = data[0]; + + if (current_bucket_num == -1) + { + ++current_bucket_num; + + if (first->type == AggregatedDataVariants::Type::without_key || aggregator.params.overflow_row) + { + aggregator.mergeWithoutKeyDataImpl(data); + return aggregator.prepareBlocksAndFillWithoutKey( + *first, final, first->type != AggregatedDataVariants::Type::without_key).front(); + } + } + + if (!first->isTwoLevel()) + { + if (current_bucket_num > 0) + return {}; + + ++current_bucket_num; + + #define M(NAME) \ + else if (first->type == AggregatedDataVariants::Type::NAME) \ + aggregator.mergeSingleLevelDataImplNAME)::element_type>(data); + if (false) {} + APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) + #undef M + else if (first->type != AggregatedDataVariants::Type::without_key) + throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + + return aggregator.prepareBlocksAndFillSingleLevel(*first, final).front(); + } + else + { + if (!parallel_merge_data) + { + parallel_merge_data.reset(new ParallelMergeData(threads)); + for (size_t i = 0; i < threads; ++i) + scheduleThreadForNextBucket(); + } + + Block res; + + while (true) + { + std::unique_lock lock(parallel_merge_data->mutex); + + if (parallel_merge_data->exception) + std::rethrow_exception(parallel_merge_data->exception); + + auto it = parallel_merge_data->ready_blocks.find(current_bucket_num); + if (it != parallel_merge_data->ready_blocks.end()) + { + ++current_bucket_num; + scheduleThreadForNextBucket(); + + if (it->second) + { + res.swap(it->second); + break; + } + else if (current_bucket_num >= NUM_BUCKETS) + break; + } + + parallel_merge_data->condvar.wait(lock); + } + + return res; + } + } + +private: + Aggregator & aggregator; + ManyAggregatedDataVariants data; + bool final; + size_t threads; + + Int32 current_bucket_num = -1; + Int32 max_scheduled_bucket_num = -1; + static constexpr Int32 NUM_BUCKETS = 256; + + struct ParallelMergeData + { + boost::threadpool::pool pool; + std::map ready_blocks; + std::exception_ptr exception; + std::mutex mutex; + std::condition_variable condvar; + + ParallelMergeData(size_t threads) : pool(threads) {} + }; + + std::unique_ptr parallel_merge_data; + + void scheduleThreadForNextBucket() + { + ++max_scheduled_bucket_num; + if (max_scheduled_bucket_num >= NUM_BUCKETS) + return; + + parallel_merge_data->pool.schedule(std::bind(&MergingAndConvertingBlockInputStream::thread, this, + max_scheduled_bucket_num, current_memory_tracker)); + } + + void thread(Int32 bucket_num, MemoryTracker * memory_tracker) + { + current_memory_tracker = memory_tracker; + setThreadName("MergingAggregtd"); + + try + { + /// TODO Возможно, поддержать no_more_keys + + auto & merged_data = *data[0]; + auto method = merged_data.type; + Block block; + + if (false) {} + #define M(NAME) \ + else if (method == AggregatedDataVariants::Type::NAME) \ + { \ + aggregator.mergeBucketImpl(data, bucket_num); \ + block = aggregator.convertOneBucketToBlock(merged_data, *merged_data.NAME, final, bucket_num); \ + } + + APPLY_FOR_VARIANTS_TWO_LEVEL(M) + #undef M + + std::lock_guard lock(parallel_merge_data->mutex); + parallel_merge_data->ready_blocks[bucket_num] = std::move(block); + } + catch (...) + { + std::lock_guard lock(parallel_merge_data->mutex); + parallel_merge_data->exception = std::current_exception(); + } + + parallel_merge_data->condvar.notify_all(); + } +}; + + +std::unique_ptr Aggregator::mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads) +{ + if (data_variants.empty()) + throw Exception("Empty data passed to Aggregator::mergeAndConvertToBlocks.", ErrorCodes::EMPTY_DATA_PASSED); + + LOG_TRACE(log, "Merging aggregated data"); + + Stopwatch watch; + + ManyAggregatedDataVariants non_empty_data; + non_empty_data.reserve(data_variants.size()); + for (auto & data : data_variants) + if (!data->empty()) + non_empty_data.push_back(data); + + if (non_empty_data.empty()) + return std::unique_ptr(new NullBlockInputStream); + + if (non_empty_data.size() > 1) + { + /// Отсортируем состояния по убыванию размера, чтобы мердж был более эффективным (так как все состояния мерджатся в первое). + std::sort(non_empty_data.begin(), non_empty_data.end(), + [](const AggregatedDataVariantsPtr & lhs, const AggregatedDataVariantsPtr & rhs) + { + return lhs->sizeWithoutOverflowRow() > rhs->sizeWithoutOverflowRow(); + }); + } + + /// Если хотя бы один из вариантов двухуровневый, то переконвертируем все варианты в двухуровневые, если есть не такие. + /// Замечание - возможно, было бы более оптимально не конвертировать одноуровневые варианты перед мерджем, а мерджить их отдельно, в конце. + + bool has_at_least_one_two_level = false; + for (const auto & variant : non_empty_data) + { + if (variant->isTwoLevel()) + { + has_at_least_one_two_level = true; + break; + } + } + + if (has_at_least_one_two_level) + for (auto & variant : non_empty_data) + if (!variant->isTwoLevel()) + variant->convertToTwoLevel(); + + AggregatedDataVariantsPtr & first = non_empty_data[0]; + + for (size_t i = 1, size = non_empty_data.size(); i < size; ++i) + if (first->type != non_empty_data[i]->type) + throw Exception("Cannot merge different aggregated data variants.", ErrorCodes::CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS); + + return std::unique_ptr(new MergingAndConvertingBlockInputStream(*this, non_empty_data, final, max_threads)); +} + + template void NO_INLINE Aggregator::mergeStreamsImplCase( Block & block, From 513eb33b5b1a71378dfa60224e0ab4215ba0297a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Dec 2015 19:42:17 +0300 Subject: [PATCH 12/40] dbms: fixed error [#METR-17000]. --- dbms/src/Interpreters/Aggregator.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index dc677a77c70..00878da2f04 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1780,6 +1780,9 @@ protected: if (current_bucket_num > 0) return {}; + if (first->type == AggregatedDataVariants::Type::without_key) + return {}; + ++current_bucket_num; #define M(NAME) \ @@ -1788,7 +1791,7 @@ protected: if (false) {} APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) #undef M - else if (first->type != AggregatedDataVariants::Type::without_key) + else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); return aggregator.prepareBlocksAndFillSingleLevel(*first, final).front(); From 9ecd4a0fa24a0635337a5d9bc8efd49a69eece30 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Dec 2015 21:55:09 +0300 Subject: [PATCH 13/40] dbms: additions [#METR-17000]. --- dbms/src/Interpreters/Aggregator.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 00878da2f04..e7eb96333ef 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -847,6 +847,7 @@ Block Aggregator::convertOneBucketToBlock( /** Для того, чтобы в случае исключения, агрегатор не уничтожал состояния агрегатных функций, владение которыми уже передано в block; * А также для того, чтобы пораньше освободить память. + * TODO Правильно действовать в случае final. */ method.data.impls[bucket].clearAndShrink(); @@ -1914,8 +1915,6 @@ std::unique_ptr Aggregator::mergeAndConvertToBlocks(ManyAggre LOG_TRACE(log, "Merging aggregated data"); - Stopwatch watch; - ManyAggregatedDataVariants non_empty_data; non_empty_data.reserve(data_variants.size()); for (auto & data : data_variants) From 0d7135fcf6a758f6143e16ec922ff872191fc10e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 6 Dec 2015 22:42:28 +0300 Subject: [PATCH 14/40] dbms: fixed errors [#METR-17000]. --- dbms/include/DB/Interpreters/Aggregator.h | 8 ++- dbms/src/Interpreters/Aggregator.cpp | 60 +++++++++++++++-------- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 8698779f66d..84aa515c060 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -1190,9 +1190,13 @@ protected: const Block & source, std::vector & destinations) const; - template + template void destroyImpl( - Method & method) const; + Method & method, + Table & data) const; + + void destroyWithoutKey( + AggregatedDataVariants & result) const; /** Проверяет ограничения на максимальное количество ключей для агрегации. diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index e7eb96333ef..915bff295e3 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -845,12 +845,6 @@ Block Aggregator::convertOneBucketToBlock( key_columns, aggregate_columns, final_aggregate_columns, key_sizes, final); }); - /** Для того, чтобы в случае исключения, агрегатор не уничтожал состояния агрегатных функций, владение которыми уже передано в block; - * А также для того, чтобы пораньше освободить память. - * TODO Правильно действовать в случае final. - */ - method.data.impls[bucket].clearAndShrink(); - block.info.bucket_num = bucket; return block; } @@ -974,6 +968,9 @@ void Aggregator::convertToBlockImpl( convertToBlockImplFinal(method, data, key_columns, final_aggregate_columns, key_sizes); else convertToBlockImplNotFinal(method, data, key_columns, aggregate_columns, key_sizes); + + /// Для того, чтобы пораньше освободить память. + data.clearAndShrink(); } @@ -994,6 +991,8 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( Method::getAggregateData(it->second) + offsets_of_aggregate_states[i], *final_aggregate_columns[i]); } + + destroyImpl(method, data); /// NOTE Можно сделать лучше. } template @@ -1129,6 +1128,11 @@ BlocksList Aggregator::prepareBlocksAndFillWithoutKey(AggregatedDataVariants & d if (is_overflows) block.info.is_overflows = true; + if (final) + destroyWithoutKey(data_variants); + else + data_variants.without_key = nullptr; + BlocksList blocks; blocks.emplace_back(std::move(block)); return blocks; @@ -1145,13 +1149,13 @@ BlocksList Aggregator::prepareBlocksAndFillSingleLevel(AggregatedDataVariants & const Sizes & key_sizes, bool final) { - #define M(NAME, IS_TWO_LEVEL) \ + #define M(NAME) \ else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, \ key_columns, aggregate_columns, final_aggregate_columns, data_variants.key_sizes, final); if (false) {} - APPLY_FOR_AGGREGATED_VARIANTS(M) + APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) #undef M else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); @@ -1513,6 +1517,7 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( /// current не будет уничтожать состояния агрегатных функций в деструкторе current.aggregator = nullptr; + getDataVariant(current).data.clearAndShrink(); } } @@ -1544,6 +1549,8 @@ void NO_INLINE Aggregator::mergeTwoLevelDataImpl( mergeDataImpl( getDataVariant(*res).data.impls[bucket], getDataVariant(current).data.impls[bucket]); + + getDataVariant(current).data.impls[bucket].clearAndShrink(); } else { @@ -2522,13 +2529,14 @@ std::vector Aggregator::convertBlockToTwoLevel(const Block & block) } -template +template void NO_INLINE Aggregator::destroyImpl( - Method & method) const + Method & method, + Table & data) const { - for (typename Method::const_iterator it = method.data.begin(); it != method.data.end(); ++it) + for (auto elem : data) { - char * data = Method::getAggregateData(it->second); + char * data = Method::getAggregateData(elem.second); /** Если исключение (обычно нехватка памяти, кидается MemoryTracker-ом) возникло * после вставки ключа в хэш-таблицу, но до создания всех состояний агрегатных функций, @@ -2540,6 +2548,23 @@ void NO_INLINE Aggregator::destroyImpl( for (size_t i = 0; i < params.aggregates_size; ++i) if (!aggregate_functions[i]->isState()) aggregate_functions[i]->destroy(data + offsets_of_aggregate_states[i]); + + data = nullptr; + } +} + + +void Aggregator::destroyWithoutKey(AggregatedDataVariants & result) const +{ + AggregatedDataWithoutKey & res_data = result.without_key; + + if (nullptr != res_data) + { + for (size_t i = 0; i < params.aggregates_size; ++i) + if (!aggregate_functions[i]->isState()) + aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]); + + res_data = nullptr; } } @@ -2553,18 +2578,11 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) /// В какой структуре данных агрегированы данные? if (result.type == AggregatedDataVariants::Type::without_key || params.overflow_row) - { - AggregatedDataWithoutKey & res_data = result.without_key; - - if (nullptr != res_data) - for (size_t i = 0; i < params.aggregates_size; ++i) - if (!aggregate_functions[i]->isState()) - aggregate_functions[i]->destroy(res_data + offsets_of_aggregate_states[i]); - } + destroyWithoutKey(result); #define M(NAME, IS_TWO_LEVEL) \ else if (result.type == AggregatedDataVariants::Type::NAME) \ - destroyImpl(*result.NAME); + destroyImpl(*result.NAME, result.NAME->data); if (false) {} APPLY_FOR_AGGREGATED_VARIANTS(M) From d019af9acecf1185e67ac07222ff49acdb2e961f Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 7 Dec 2015 15:46:13 +0300 Subject: [PATCH 15/40] dbms: add test for float formatting which has proven problematic [#METR-19166] --- .../0_stateless/00286_format_long_negative_float.reference | 1 + .../queries/0_stateless/00286_format_long_negative_float.sql | 1 + 2 files changed, 2 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00286_format_long_negative_float.reference create mode 100644 dbms/tests/queries/0_stateless/00286_format_long_negative_float.sql diff --git a/dbms/tests/queries/0_stateless/00286_format_long_negative_float.reference b/dbms/tests/queries/0_stateless/00286_format_long_negative_float.reference new file mode 100644 index 00000000000..298d90bec85 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00286_format_long_negative_float.reference @@ -0,0 +1 @@ +-0.0000019073486299999997 diff --git a/dbms/tests/queries/0_stateless/00286_format_long_negative_float.sql b/dbms/tests/queries/0_stateless/00286_format_long_negative_float.sql new file mode 100644 index 00000000000..e4d7a7085d2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00286_format_long_negative_float.sql @@ -0,0 +1 @@ +select reinterpretAsFloat64(unhex('875635ffffffbfbe')) From 479f1fc346a1931b7c50b39a01beb1def1320749 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 7 Dec 2015 19:06:18 +0300 Subject: [PATCH 16/40] dbms: fix typo and absent support of Float64 for MySQL dictionaries [#METR-18946] --- dbms/include/DB/Dictionaries/MySQLBlockInputStream.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Dictionaries/MySQLBlockInputStream.h b/dbms/include/DB/Dictionaries/MySQLBlockInputStream.h index c66f0a19497..5021345ff55 100644 --- a/dbms/include/DB/Dictionaries/MySQLBlockInputStream.h +++ b/dbms/include/DB/Dictionaries/MySQLBlockInputStream.h @@ -77,7 +77,7 @@ public: types.push_back(value_type_t::Int64); else if (typeid_cast(type)) types.push_back(value_type_t::Float32); - else if (typeid_cast(type)) + else if (typeid_cast(type)) types.push_back(value_type_t::Float64); else if (typeid_cast(type)) types.push_back(value_type_t::String); From cf2c86956e129eb63bfae612269c67129139f049 Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Mon, 7 Dec 2015 19:07:02 +0300 Subject: [PATCH 17/40] dbms: SmallObjectPool: remove minimum size check [#METR-18946] --- dbms/include/DB/Common/SmallObjectPool.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dbms/include/DB/Common/SmallObjectPool.h b/dbms/include/DB/Common/SmallObjectPool.h index 61c07393252..16faf5f879d 100644 --- a/dbms/include/DB/Common/SmallObjectPool.h +++ b/dbms/include/DB/Common/SmallObjectPool.h @@ -14,10 +14,13 @@ namespace DB { +/** Can allocate memory objects of fixed size with deletion support. + * For `object_size` less than `min_allocation_size` still allocates `min_allocation_size` bytes. */ class SmallObjectPool { private: struct Block { Block * next; }; + static constexpr auto min_allocation_size = sizeof(Block); const std::size_t object_size; Arena pool; @@ -25,16 +28,11 @@ private: public: SmallObjectPool( - const std::size_t object_size, const std::size_t initial_size = 4096, const std::size_t growth_factor = 2, + const std::size_t object_size_, const std::size_t initial_size = 4096, const std::size_t growth_factor = 2, const std::size_t linear_growth_threshold = 128 * 1024 * 1024) - : object_size{object_size}, pool{initial_size, growth_factor, linear_growth_threshold} + : object_size{std::max(object_size_, min_allocation_size)}, + pool{initial_size, growth_factor, linear_growth_threshold} { - if (object_size < sizeof(Block)) - throw Exception{ - "Can't make allocations smaller than sizeof(Block) = " + std::to_string(sizeof(Block)), - ErrorCodes::LOGICAL_ERROR - }; - if (pool.size() < object_size) return; From 78b560c846c4fb68b83097889afc79bdf4b7ec56 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 7 Dec 2015 22:30:50 +0300 Subject: [PATCH 18/40] dbms: fixed error [#METR-19283]. --- dbms/src/Storages/MergeTree/PKCondition.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dbms/src/Storages/MergeTree/PKCondition.cpp b/dbms/src/Storages/MergeTree/PKCondition.cpp index 0211b053c42..aae687a90d1 100644 --- a/dbms/src/Storages/MergeTree/PKCondition.cpp +++ b/dbms/src/Storages/MergeTree/PKCondition.cpp @@ -457,6 +457,13 @@ bool PKCondition::mayBeTrueInRange(const Field * left_pk, const Field * right_pk applyFunction(func, current_type, key_range_transformed.left, new_type, key_range_transformed.left); if (!key_range_transformed.right.isNull()) applyFunction(func, current_type, key_range_transformed.right, new_type, key_range_transformed.right); + + if (!new_type) + { + evaluation_is_not_possible = true; + break; + } + current_type.swap(new_type); if (!monotonicity.is_positive) From 35db0d65b9d7f1c4421c0a139f0c1ebc6eb67b17 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 7 Dec 2015 23:08:00 +0300 Subject: [PATCH 19/40] dbms: fixed error [#METR-19288]. --- .../MergingAggregatedMemoryEfficientBlockInputStream.cpp | 9 +++++++-- dbms/src/Interpreters/Aggregator.cpp | 8 ++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 7407e03908e..d6b42497774 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -62,7 +62,6 @@ void MergingAggregatedMemoryEfficientBlockInputStream::start() auto memory_tracker = current_memory_tracker; task = std::packaged_task([&child, memory_tracker] { - /// memory_tracker и имя потока устанавливается здесь. Далее для всех задач в reading_pool это уже не требуется. current_memory_tracker = memory_tracker; setThreadName("MergeAggReadThr"); child->readPrefix(); @@ -276,7 +275,13 @@ MergingAggregatedMemoryEfficientBlockInputStream::BlocksToMerge MergingAggregate { if (need_that_input(input)) { - tasks.emplace_back([&input, &read_from_input] { read_from_input(input); }); + auto memory_tracker = current_memory_tracker; + tasks.emplace_back([&input, &read_from_input, memory_tracker] + { + current_memory_tracker = memory_tracker; + setThreadName("MergeAggReadThr"); + read_from_input(input); + }); auto & task = tasks.back(); reading_pool->schedule([&task] { task(); }); } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 915bff295e3..c504f224e17 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -90,7 +90,8 @@ void Aggregator::initialize(const Block & block) initialized = true; - memory_usage_before_aggregation = current_memory_tracker->get(); + if (current_memory_tracker) + memory_usage_before_aggregation = current_memory_tracker->get(); aggregate_functions.resize(params.aggregates_size); for (size_t i = 0; i < params.aggregates_size; ++i) @@ -732,7 +733,10 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result, } size_t result_size = result.sizeWithoutOverflowRow(); - auto current_memory_usage = current_memory_tracker->get(); + Int64 current_memory_usage = 0; + if (current_memory_tracker) + current_memory_usage = current_memory_tracker->get(); + auto result_size_bytes = current_memory_usage - memory_usage_before_aggregation; /// Здесь учитываются все результаты в сумме, из разных потоков. bool worth_convert_to_two_level From dc7372ab71f20cd062e3b58e92835a1c5395d329 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 8 Dec 2015 04:17:57 +0300 Subject: [PATCH 20/40] dbms: fixed error [#METR-19271]. --- dbms/include/DB/Columns/ColumnConst.h | 29 +++++++++++++++++-- .../00287_column_const_with_nan.reference | 1 + .../00287_column_const_with_nan.sql | 1 + 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00287_column_const_with_nan.reference create mode 100644 dbms/tests/queries/0_stateless/00287_column_const_with_nan.sql diff --git a/dbms/include/DB/Columns/ColumnConst.h b/dbms/include/DB/Columns/ColumnConst.h index b6bfa226ada..125d1f2e644 100644 --- a/dbms/include/DB/Columns/ColumnConst.h +++ b/dbms/include/DB/Columns/ColumnConst.h @@ -26,6 +26,29 @@ public: }; +namespace ColumnConstDetails +{ + template + inline bool equals(const T & x, const T & y) + { + return x == y; + } + + /// Проверяет побитовую идентичность элементов, даже если они являются NaN-ами. + template <> + inline bool equals(const Float32 & x, const Float32 & y) + { + return 0 == memcmp(&x, &y, sizeof(x)); + } + + template <> + inline bool equals(const Float64 & x, const Float64 & y) + { + return 0 == memcmp(&x, &y, sizeof(x)); + } +} + + /** Столбец-константа может содержать внутри себя само значение, * или, в случае массивов, SharedPtr от значения-массива, * чтобы избежать проблем производительности при копировании очень больших массивов. @@ -65,7 +88,7 @@ public: void insertRangeFrom(const IColumn & src, size_t start, size_t length) override { - if (getDataFromHolder() != static_cast(src).getDataFromHolder()) + if (!ColumnConstDetails::equals(getDataFromHolder(), static_cast(src).getDataFromHolder())) throw Exception("Cannot insert different element into constant column " + getName(), ErrorCodes::CANNOT_INSERT_ELEMENT_INTO_CONSTANT_COLUMN); @@ -74,7 +97,7 @@ public: void insert(const Field & x) override { - if (x.get() != FieldType(getDataFromHolder())) + if (!ColumnConstDetails::equals(x.get(), FieldType(getDataFromHolder()))) throw Exception("Cannot insert different element into constant column " + getName(), ErrorCodes::CANNOT_INSERT_ELEMENT_INTO_CONSTANT_COLUMN); ++s; @@ -87,7 +110,7 @@ public: void insertFrom(const IColumn & src, size_t n) override { - if (getDataFromHolder() != static_cast(src).getDataFromHolder()) + if (!ColumnConstDetails::equals(getDataFromHolder(), static_cast(src).getDataFromHolder())) throw Exception("Cannot insert different element into constant column " + getName(), ErrorCodes::CANNOT_INSERT_ELEMENT_INTO_CONSTANT_COLUMN); ++s; diff --git a/dbms/tests/queries/0_stateless/00287_column_const_with_nan.reference b/dbms/tests/queries/0_stateless/00287_column_const_with_nan.reference new file mode 100644 index 00000000000..946573052ce --- /dev/null +++ b/dbms/tests/queries/0_stateless/00287_column_const_with_nan.reference @@ -0,0 +1 @@ +nan 1 diff --git a/dbms/tests/queries/0_stateless/00287_column_const_with_nan.sql b/dbms/tests/queries/0_stateless/00287_column_const_with_nan.sql new file mode 100644 index 00000000000..67931511ac2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00287_column_const_with_nan.sql @@ -0,0 +1 @@ +SELECT * FROM (SELECT nan, number FROM system.numbers) WHERE number % 100 = 1 LIMIT 1; From 18c3aa441aa5900eb5cbf473c986b0022b3641a2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 8 Dec 2015 04:43:39 +0300 Subject: [PATCH 21/40] dbms: removing old temporary files on startup [#METR-17000]. --- dbms/src/Server/Server.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/dbms/src/Server/Server.cpp b/dbms/src/Server/Server.cpp index 134b5e8e6d8..5c623b4fe05 100644 --- a/dbms/src/Server/Server.cpp +++ b/dbms/src/Server/Server.cpp @@ -584,10 +584,22 @@ int Server::main(const std::vector & args) global_context->setPath(path); /// Директория для временных файлов при обработке тяжёлых запросов. - std::string tmp_path = config().getString("tmp_path", path + "tmp/"); - global_context->setTemporaryPath(tmp_path); - Poco::File(tmp_path).createDirectories(); - /// TODO Очистка временных файлов. Проверка, что директория с временными файлами не совпадает и не содержит в себе основной path. + { + std::string tmp_path = config().getString("tmp_path", path + "tmp/"); + global_context->setTemporaryPath(tmp_path); + Poco::File(tmp_path).createDirectories(); + + /// Очистка временных файлов. + Poco::DirectoryIterator dir_end; + for (Poco::DirectoryIterator it(tmp_path); it != dir_end; ++it) + { + if (it->isFile() && 0 == it.name().compare(0, 3, "tmp")) + { + LOG_DEBUG(log, "Removing old temporary file " << it->path()); + it->remove(); + } + } + } bool has_zookeeper = false; if (config().has("zookeeper")) From 50d463d08b2da5e430321d1dcdb469d2f8ef5686 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 8 Dec 2015 05:01:46 +0300 Subject: [PATCH 22/40] dbms: fixed error [#METR-17000]. --- dbms/include/DB/Interpreters/Aggregator.h | 9 +++------ dbms/src/Interpreters/Aggregator.cpp | 17 ++++++++++------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 84aa515c060..fe9d37f7df4 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -849,18 +849,15 @@ public: * которые могут быть затем объединены с другими состояниями (для распределённой обработки запроса). * Если final = true, то в качестве столбцов-агрегатов создаются столбцы с готовыми значениями. */ - BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads); + BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const; /** Объединить несколько структур данных агрегации в одну. (В первый непустой элемент массива.) - * После объединения, все стркутуры агрегации (а не только те, в которую они будут слиты) должны жить, - * пока не будет вызвана функция convertToBlocks. - * Это нужно, так как в слитом результате могут остаться указатели на память в пуле, которым владеют другие структуры агрегации. */ - AggregatedDataVariantsPtr merge(ManyAggregatedDataVariants & data_variants, size_t max_threads); + AggregatedDataVariantsPtr merge(ManyAggregatedDataVariants & data_variants, size_t max_threads) const; /** Объединить несколько структур данных агрегации и выдать результат в виде потока блоков. */ - std::unique_ptr mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads); + std::unique_ptr mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads) const; /** Объединить поток частично агрегированных блоков в одну структуру данных. * (Доагрегировать несколько блоков, которые представляют собой результат независимых агрегаций с удалённых серверов.) diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index c504f224e17..c3dcb6a78d3 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1269,7 +1269,7 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl( } -BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) +BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const { if (isCancelled()) return BlocksList(); @@ -1618,7 +1618,7 @@ void NO_INLINE Aggregator::mergeTwoLevelDataImpl( } -AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_variants, size_t max_threads) +AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_variants, size_t max_threads) const { if (data_variants.empty()) throw Exception("Empty data passed to Aggregator::merge.", ErrorCodes::EMPTY_DATA_PASSED); @@ -1743,7 +1743,7 @@ public: /** На вход подаётся набор непустых множеств частично агрегированных данных, * которые все либо являются одноуровневыми, либо являются двухуровневыми. */ - MergingAndConvertingBlockInputStream(Aggregator & aggregator_, ManyAggregatedDataVariants & data_, bool final_, size_t threads_) + MergingAndConvertingBlockInputStream(const Aggregator & aggregator_, ManyAggregatedDataVariants & data_, bool final_, size_t threads_) : aggregator(aggregator_), data(data_), final(final_), threads(threads_) {} String getName() const override { return "MergingAndConverting"; } @@ -1849,7 +1849,7 @@ protected: } private: - Aggregator & aggregator; + const Aggregator & aggregator; ManyAggregatedDataVariants data; bool final; size_t threads; @@ -1919,7 +1919,7 @@ private: }; -std::unique_ptr Aggregator::mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads) +std::unique_ptr Aggregator::mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads) const { if (data_variants.empty()) throw Exception("Empty data passed to Aggregator::mergeAndConvertToBlocks.", ErrorCodes::EMPTY_DATA_PASSED); @@ -2306,8 +2306,11 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) Block empty_block; initialize(empty_block); - if (!sample) - sample = blocks.front().cloneEmpty(); + { + std::lock_guard lock(mutex); + if (!sample) + sample = blocks.front().cloneEmpty(); + } /// Каким способом выполнять агрегацию? for (size_t i = 0; i < params.keys_size; ++i) From a57f9967700fc1cfc48a8867bf17cec39fc5eabf Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 8 Dec 2015 12:16:09 +0300 Subject: [PATCH 23/40] dbms: fix transform() accepting non constant second argument --- dbms/include/DB/Functions/FunctionsTransform.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/include/DB/Functions/FunctionsTransform.h b/dbms/include/DB/Functions/FunctionsTransform.h index 62650874989..353cb61a7e9 100644 --- a/dbms/include/DB/Functions/FunctionsTransform.h +++ b/dbms/include/DB/Functions/FunctionsTransform.h @@ -129,7 +129,7 @@ public: const ColumnConstArray * array_from = typeid_cast(&*block.getByPosition(arguments[1]).column); const ColumnConstArray * array_to = typeid_cast(&*block.getByPosition(arguments[2]).column); - if (!array_from && !array_to) + if (!array_from || !array_to) throw Exception("Second and third arguments of function " + getName() + " must be constant arrays.", ErrorCodes::ILLEGAL_COLUMN); prepare(array_from->getData(), array_to->getData(), block, arguments); From c8e3d9053636707ffb9d02dedbda455032af850b Mon Sep 17 00:00:00 2001 From: Andrey Mironov Date: Tue, 8 Dec 2015 12:16:33 +0300 Subject: [PATCH 24/40] dbms: SmallObjectPool: fix odr-use in debug build --- dbms/include/DB/Common/SmallObjectPool.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/include/DB/Common/SmallObjectPool.h b/dbms/include/DB/Common/SmallObjectPool.h index 16faf5f879d..7948ae42626 100644 --- a/dbms/include/DB/Common/SmallObjectPool.h +++ b/dbms/include/DB/Common/SmallObjectPool.h @@ -15,12 +15,12 @@ namespace DB /** Can allocate memory objects of fixed size with deletion support. - * For `object_size` less than `min_allocation_size` still allocates `min_allocation_size` bytes. */ + * For small `object_size`s allocated no less than getMinAllocationSize() bytes. */ class SmallObjectPool { private: struct Block { Block * next; }; - static constexpr auto min_allocation_size = sizeof(Block); + static constexpr auto getMinAllocationSize() { return sizeof(Block); } const std::size_t object_size; Arena pool; @@ -30,7 +30,7 @@ public: SmallObjectPool( const std::size_t object_size_, const std::size_t initial_size = 4096, const std::size_t growth_factor = 2, const std::size_t linear_growth_threshold = 128 * 1024 * 1024) - : object_size{std::max(object_size_, min_allocation_size)}, + : object_size{std::max(object_size_, getMinAllocationSize())}, pool{initial_size, growth_factor, linear_growth_threshold} { if (pool.size() < object_size) From f6973a32c4959634a15f15eff9caf25d4304bdbd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 8 Dec 2015 23:04:11 +0300 Subject: [PATCH 25/40] dbms: added support for empty StripeLog tables [#METR-19298]. --- dbms/src/Storages/StorageStripeLog.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Storages/StorageStripeLog.cpp b/dbms/src/Storages/StorageStripeLog.cpp index 074a5d611d6..acb2f062c0b 100644 --- a/dbms/src/Storages/StorageStripeLog.cpp +++ b/dbms/src/Storages/StorageStripeLog.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -232,6 +233,9 @@ BlockInputStreams StorageStripeLog::read( NameSet column_names_set(column_names.begin(), column_names.end()); + if (!Poco::File(full_path() + "index.mrk").exists()) + return { new NullBlockInputStream }; + CompressedReadBufferFromFile index_in(full_path() + "index.mrk", 0, 0, INDEX_BUFFER_SIZE); std::shared_ptr index{std::make_shared(index_in, column_names_set)}; From 3b9466ee0831fcb71c4e572f15a4546a0bd52c52 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 8 Dec 2015 23:05:49 +0300 Subject: [PATCH 26/40] dbms: added test [#METR-19298]. --- .../queries/0_stateless/00288_empty_stripelog.reference | 2 ++ dbms/tests/queries/0_stateless/00288_empty_stripelog.sql | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/00288_empty_stripelog.reference create mode 100644 dbms/tests/queries/0_stateless/00288_empty_stripelog.sql diff --git a/dbms/tests/queries/0_stateless/00288_empty_stripelog.reference b/dbms/tests/queries/0_stateless/00288_empty_stripelog.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00288_empty_stripelog.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/dbms/tests/queries/0_stateless/00288_empty_stripelog.sql b/dbms/tests/queries/0_stateless/00288_empty_stripelog.sql new file mode 100644 index 00000000000..fddbbedaac2 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00288_empty_stripelog.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS test.stripelog; +CREATE TABLE test.stripelog (x UInt8) ENGINE = StripeLog; + +SELECT * FROM test.stripelog ORDER BY x; +INSERT INTO test.stripelog VALUES (1), (2); +SELECT * FROM test.stripelog ORDER BY x; + +DROP TABLE test.stripelog; From ccd51123bf4d4ca186da2fc5c8b01a9cf87ee274 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 00:29:38 +0300 Subject: [PATCH 27/40] dbms: fixed error [#METR-19316]. --- .../MergingAggregatedMemoryEfficientBlockInputStream.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index d6b42497774..5c0fb5ec08f 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -128,6 +128,12 @@ MergingAggregatedMemoryEfficientBlockInputStream::~MergingAggregatedMemoryEffici if (parallel_merge_data) { LOG_TRACE((&Logger::get("MergingAggregatedMemoryEfficientBlockInputStream")), "Waiting for threads to finish"); + + { + std::lock_guard lock(parallel_merge_data->get_next_blocks_mutex); + parallel_merge_data->exhausted = true; + } + parallel_merge_data->result_queue.clear(); parallel_merge_data->pool.wait(); } From d852ef480bcb77c31031e4e22e7d0dd10494c79e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 04:29:01 +0300 Subject: [PATCH 28/40] dbms: external aggregation: fixed error [#METR-19316]. --- ...rgingAggregatedMemoryEfficientBlockInputStream.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index 5c0fb5ec08f..f3b5013bb7f 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -173,7 +173,16 @@ void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread(MemoryTracker } } - parallel_merge_data->result_queue.push(aggregator.mergeBlocks(*blocks_to_merge, final)); + Block res = aggregator.mergeBlocks(*blocks_to_merge, final); + + { + std::lock_guard lock(parallel_merge_data->get_next_blocks_mutex); + + if (parallel_merge_data->exhausted) + break; + + parallel_merge_data->result_queue.push(OutputData(std::move(res))); + } } } catch (...) From 69942f38d47622f8592a92f572f5e9056435ebf5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 05:27:45 +0300 Subject: [PATCH 29/40] dbms: fixed error with ParserAlterQuery [#METR-13097]. --- dbms/src/Parsers/ParserAlterQuery.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index cf933805184..49cf3fc57e2 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -90,7 +90,8 @@ bool ParserAlterQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_pa return false; ws.ignore(pos, end); - parser_col_decl.parse(pos, end, params.col_decl, max_parsed_pos, expected); + if (!parser_col_decl.parse(pos, end, params.col_decl, max_parsed_pos, expected)) + return false; ws.ignore(pos, end); if (s_after.ignore(pos, end, max_parsed_pos, expected)) From 7560351942b94a1ce7aab860a8f82707a9649083 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 05:55:35 +0300 Subject: [PATCH 30/40] dbms: removed old code [#METR-17000]. --- dbms/include/DB/Interpreters/Aggregator.h | 15 -- .../AggregatingBlockInputStream.cpp | 12 +- dbms/src/Interpreters/Aggregator.cpp | 214 ------------------ 3 files changed, 6 insertions(+), 235 deletions(-) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index fe9d37f7df4..907faa78f84 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -851,10 +851,6 @@ public: */ BlocksList convertToBlocks(AggregatedDataVariants & data_variants, bool final, size_t max_threads) const; - /** Объединить несколько структур данных агрегации в одну. (В первый непустой элемент массива.) - */ - AggregatedDataVariantsPtr merge(ManyAggregatedDataVariants & data_variants, size_t max_threads) const; - /** Объединить несколько структур данных агрегации и выдать результат в виде потока блоков. */ std::unique_ptr mergeAndConvertToBlocks(ManyAggregatedDataVariants & data_variants, bool final, size_t max_threads) const; @@ -1081,12 +1077,6 @@ protected: Table & table_dst, Table & table_src) const; - /// Слить все ключи, оставшиеся после предыдущего метода, в overflows. - template - void mergeDataRemainingKeysToOverflowsImpl( - AggregatedDataWithoutKey & overflows, - Table & table_src) const; - void mergeWithoutKeyDataImpl( ManyAggregatedDataVariants & non_empty_data) const; @@ -1094,11 +1084,6 @@ protected: void mergeSingleLevelDataImpl( ManyAggregatedDataVariants & non_empty_data) const; - template - void mergeTwoLevelDataImpl( - ManyAggregatedDataVariants & many_data, - boost::threadpool::pool * thread_pool) const; - template void convertToBlockImpl( Method & method, diff --git a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp index 618f2488499..bfd146d8e5e 100644 --- a/dbms/src/DataStreams/AggregatingBlockInputStream.cpp +++ b/dbms/src/DataStreams/AggregatingBlockInputStream.cpp @@ -12,17 +12,17 @@ Block AggregatingBlockInputStream::readImpl() if (!executed) { executed = true; - AggregatedDataVariants data_variants; + AggregatedDataVariantsPtr data_variants = new AggregatedDataVariants; Aggregator::CancellationHook hook = [&]() { return this->isCancelled(); }; aggregator.setCancellationHook(hook); - aggregator.execute(children.back(), data_variants); + aggregator.execute(children.back(), *data_variants); if (!aggregator.hasTemporaryFiles()) { - impl.reset(new BlocksListBlockInputStream( - aggregator.convertToBlocks(data_variants, final, 1))); + ManyAggregatedDataVariants many_data { data_variants }; + impl = aggregator.mergeAndConvertToBlocks(many_data, final, 1); } else { @@ -35,9 +35,9 @@ Block AggregatingBlockInputStream::readImpl() if (!isCancelled()) { /// Сбросим имеющиеся в оперативке данные тоже на диск. Так проще. - size_t rows = data_variants.sizeWithoutOverflowRow(); + size_t rows = data_variants->sizeWithoutOverflowRow(); if (rows) - aggregator.writeToTemporaryFile(data_variants, rows); + aggregator.writeToTemporaryFile(*data_variants, rows); } const auto & files = aggregator.getTemporaryFiles(); diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index c3dcb6a78d3..4da3dd7905c 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -1442,31 +1442,6 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( } } -template -void NO_INLINE Aggregator::mergeDataRemainingKeysToOverflowsImpl( - AggregatedDataWithoutKey & overflows, - Table & table_src) const -{ - for (auto it = table_src.begin(); it != table_src.end(); ++it) - { - if (Method::getAggregateData(it->second) == nullptr) - continue; - - AggregateDataPtr res_data = overflows; - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->merge( - res_data + offsets_of_aggregate_states[i], - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); - - for (size_t i = 0; i < params.aggregates_size; ++i) - aggregate_functions[i]->destroy( - Method::getAggregateData(it->second) + offsets_of_aggregate_states[i]); - - Method::getAggregateData(it->second) = nullptr; - } -} - void NO_INLINE Aggregator::mergeWithoutKeyDataImpl( ManyAggregatedDataVariants & non_empty_data) const @@ -1526,193 +1501,6 @@ void NO_INLINE Aggregator::mergeSingleLevelDataImpl( } -template -void NO_INLINE Aggregator::mergeTwoLevelDataImpl( - ManyAggregatedDataVariants & non_empty_data, - boost::threadpool::pool * thread_pool) const -{ - AggregatedDataVariantsPtr & res = non_empty_data[0]; - - /// В данном случае, no_more_keys будет выставлено, только если в первом (самом большом) состоянии достаточно много строк. - bool no_more_keys = false; - if (!checkLimits(res->sizeWithoutOverflowRow(), no_more_keys)) - return; - - /// Слияние распараллеливается по корзинам - первому уровню TwoLevelHashMap. - auto merge_bucket = [&non_empty_data, &res, no_more_keys, this](size_t bucket, MemoryTracker * memory_tracker) - { - current_memory_tracker = memory_tracker; - - /// Все результаты агрегации соединяем с первым. - for (size_t i = 1, size = non_empty_data.size(); i < size; ++i) - { - AggregatedDataVariants & current = *non_empty_data[i]; - - if (!no_more_keys) - { - mergeDataImpl( - getDataVariant(*res).data.impls[bucket], - getDataVariant(current).data.impls[bucket]); - - getDataVariant(current).data.impls[bucket].clearAndShrink(); - } - else - { - mergeDataOnlyExistingKeysImpl( - getDataVariant(*res).data.impls[bucket], - getDataVariant(current).data.impls[bucket]); - } - } - }; - - /// packaged_task используются, чтобы исключения автоматически прокидывались в основной поток. - - std::vector> tasks(Method::Data::NUM_BUCKETS); - - try - { - for (size_t bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket) - { - tasks[bucket] = std::packaged_task(std::bind(merge_bucket, bucket, current_memory_tracker)); - - if (thread_pool) - thread_pool->schedule([bucket, &tasks] { tasks[bucket](); }); - else - tasks[bucket](); - } - } - catch (...) - { - /// Если этого не делать, то в случае исключения, tasks уничтожится раньше завершения потоков, и будет плохо. - if (thread_pool) - thread_pool->wait(); - - throw; - } - - if (thread_pool) - thread_pool->wait(); - - for (auto & task : tasks) - if (task.valid()) - task.get_future().get(); - - if (no_more_keys && params.overflow_row) - { - for (size_t bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket) - { - for (size_t i = 1, size = non_empty_data.size(); i < size; ++i) - { - AggregatedDataVariants & current = *non_empty_data[i]; - - mergeDataRemainingKeysToOverflowsImpl( - res->without_key, - getDataVariant(current).data.impls[bucket]); - } - } - } - - /// aggregator не будет уничтожать состояния агрегатных функций в деструкторе - for (size_t i = 1, size = non_empty_data.size(); i < size; ++i) - non_empty_data[i]->aggregator = nullptr; -} - - -AggregatedDataVariantsPtr Aggregator::merge(ManyAggregatedDataVariants & data_variants, size_t max_threads) const -{ - if (data_variants.empty()) - throw Exception("Empty data passed to Aggregator::merge.", ErrorCodes::EMPTY_DATA_PASSED); - - LOG_TRACE(log, "Merging aggregated data"); - - Stopwatch watch; - - ManyAggregatedDataVariants non_empty_data; - non_empty_data.reserve(data_variants.size()); - for (auto & data : data_variants) - if (!data->empty()) - non_empty_data.push_back(data); - - if (non_empty_data.empty()) - return data_variants[0]; - - if (non_empty_data.size() == 1) - return non_empty_data[0]; - - /// Отсортируем состояния по убыванию размера, чтобы мердж был более эффективным (так как все состояния мерджатся в первое). - std::sort(non_empty_data.begin(), non_empty_data.end(), - [](const AggregatedDataVariantsPtr & lhs, const AggregatedDataVariantsPtr & rhs) - { - return lhs->sizeWithoutOverflowRow() > rhs->sizeWithoutOverflowRow(); - }); - - /// Если хотя бы один из вариантов двухуровневый, то переконвертируем все варианты в двухуровневые, если есть не такие. - /// Замечание - возможно, было бы более оптимально не конвертировать одноуровневые варианты перед мерджем, а мерджить их отдельно, в конце. - - bool has_at_least_one_two_level = false; - for (const auto & variant : non_empty_data) - { - if (variant->isTwoLevel()) - { - has_at_least_one_two_level = true; - break; - } - } - - if (has_at_least_one_two_level) - for (auto & variant : non_empty_data) - if (!variant->isTwoLevel()) - variant->convertToTwoLevel(); - - AggregatedDataVariantsPtr & res = non_empty_data[0]; - - size_t rows = res->size(); - for (size_t i = 1, size = non_empty_data.size(); i < size; ++i) - { - rows += non_empty_data[i]->size(); - AggregatedDataVariants & current = *non_empty_data[i]; - - if (res->type != current.type) - throw Exception("Cannot merge different aggregated data variants.", ErrorCodes::CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS); - - res->aggregates_pools.insert(res->aggregates_pools.end(), current.aggregates_pools.begin(), current.aggregates_pools.end()); - } - - /// В какой структуре данных агрегированы данные? - if (res->type == AggregatedDataVariants::Type::without_key || params.overflow_row) - mergeWithoutKeyDataImpl(non_empty_data); - - std::unique_ptr thread_pool; - if (max_threads > 1 && res->isTwoLevel()) - thread_pool.reset(new boost::threadpool::pool(max_threads)); - - if (false) {} -#define M(NAME) \ - else if (res->type == AggregatedDataVariants::Type::NAME) \ - mergeSingleLevelDataImplNAME)::element_type>(non_empty_data); - APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) -#undef M -#define M(NAME) \ - else if (res->type == AggregatedDataVariants::Type::NAME) \ - mergeTwoLevelDataImplNAME)::element_type>(non_empty_data, thread_pool.get()); - APPLY_FOR_VARIANTS_TWO_LEVEL(M) -#undef M - else if (res->type != AggregatedDataVariants::Type::without_key) - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); - - double elapsed_seconds = watch.elapsedSeconds(); - size_t res_rows = res->size(); - - LOG_TRACE(log, std::fixed << std::setprecision(3) - << "Merged aggregated data. " - << "From " << rows << " to " << res_rows << " rows (efficiency: " << static_cast(rows) / res_rows << ")" - << " in " << elapsed_seconds << " sec." - << " (" << rows / elapsed_seconds << " rows/sec.)"); - - return res; -} - - template void NO_INLINE Aggregator::mergeBucketImpl( ManyAggregatedDataVariants & data, Int32 bucket) const @@ -1734,8 +1522,6 @@ void NO_INLINE Aggregator::mergeBucketImpl( * Если состояния агрегации двухуровневые, то выдаёт блоки строго по порядку bucket_num. * (Это важно при распределённой обработке.) * При этом, может обрабатывать разные bucket-ы параллельно, используя до threads потоков. - * - * TODO Удалить обычную функцию Aggregator::merge и связанные с ней, в случае невостребованности. */ class MergingAndConvertingBlockInputStream : public IProfilingBlockInputStream { From a43272c9b81cddaa44be30de1be1af1a1e3fe93c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 05:56:18 +0300 Subject: [PATCH 31/40] dbms: fixed error in memory-efficient merging of aggregated states [#METR-17000]. --- dbms/include/DB/Interpreters/Aggregator.h | 7 ++++- dbms/src/Interpreters/Aggregator.cpp | 32 +++++++++++------------ 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 907faa78f84..cf7c8f3310a 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -968,10 +968,15 @@ protected: TemporaryFiles temporary_files; /** Если заданы только имена столбцов (key_names, а также aggregates[i].column_name), то вычислить номера столбцов. - * Сформировать блок - пример результата. + * Сформировать блок - пример результата. Он используется в методах convertToBlocks, mergeAndConvertToBlocks. */ void initialize(const Block & block); + /** Установить блок - пример результата, + * только если он ещё не был установлен. + */ + void setSampleBlock(const Block & block); + /** Выбрать способ агрегации на основе количества и типов ключей. */ AggregatedDataVariants::Type chooseAggregationMethod(const ConstColumnPlainPtrs & key_columns, Sizes & key_sizes); diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index 4da3dd7905c..6c66b5a658e 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -155,6 +155,15 @@ void Aggregator::initialize(const Block & block) } +void Aggregator::setSampleBlock(const Block & block) +{ + std::lock_guard lock(mutex); + + if (!sample) + sample = block.cloneEmpty(); +} + + void Aggregator::compileIfPossible(AggregatedDataVariants::Type type) { std::lock_guard lock(mutex); @@ -1895,8 +1904,7 @@ void Aggregator::mergeStream(BlockInputStreamPtr stream, AggregatedDataVariants AggregateColumnsData aggregate_columns(params.aggregates_size); - Block empty_block; - initialize(empty_block); + initialize({}); if (isCancelled()) return; @@ -1929,8 +1937,7 @@ void Aggregator::mergeStream(BlockInputStreamPtr stream, AggregatedDataVariants if (bucket_to_blocks.empty()) return; - if (!sample) - sample = bucket_to_blocks.begin()->second.front().cloneEmpty(); + setSampleBlock(bucket_to_blocks.begin()->second.front()); /// Каким способом выполнять агрегацию? for (size_t i = 0; i < params.keys_size; ++i) @@ -2089,14 +2096,8 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) AggregateColumnsData aggregate_columns(params.aggregates_size); - Block empty_block; - initialize(empty_block); - - { - std::lock_guard lock(mutex); - if (!sample) - sample = blocks.front().cloneEmpty(); - } + initialize({}); + setSampleBlock(blocks.front()); /// Каким способом выполнять агрегацию? for (size_t i = 0; i < params.keys_size; ++i) @@ -2261,11 +2262,8 @@ std::vector Aggregator::convertBlockToTwoLevel(const Block & block) if (!block) return {}; - Block empty_block; - initialize(empty_block); - - if (!sample) - sample = block.cloneEmpty(); + initialize({}); + setSampleBlock(block); AggregatedDataVariants data; From d1fc6175f33fb445521827f4add69a418a176cf2 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 06:30:54 +0300 Subject: [PATCH 32/40] dbms: added revision to query_log table [#METR-19319]. --- dbms/src/Interpreters/QueryLog.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dbms/src/Interpreters/QueryLog.cpp b/dbms/src/Interpreters/QueryLog.cpp index e491e5138ae..28851ef93a6 100644 --- a/dbms/src/Interpreters/QueryLog.cpp +++ b/dbms/src/Interpreters/QueryLog.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace DB @@ -203,6 +204,7 @@ Block QueryLog::createBlock() {new ColumnFixedString(16), new DataTypeFixedString(16), "ip_address"}, {new ColumnString, new DataTypeString, "user"}, {new ColumnString, new DataTypeString, "query_id"}, + {new ColumnUInt32, new DataTypeUInt32, "revision"}, }; } @@ -262,6 +264,8 @@ void QueryLog::flush() block.unsafeGetByPosition(i++).column.get()->insertData(elem.user.data(), elem.user.size()); block.unsafeGetByPosition(i++).column.get()->insertData(elem.query_id.data(), elem.query_id.size()); + + block.unsafeGetByPosition(i++).column.get()->insert(static_cast(Revision::get())); } BlockOutputStreamPtr stream = table->write({}, {}); From e348105481391e0d96758e25c543e116995ee967 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 07:06:44 +0300 Subject: [PATCH 33/40] dbms: get rid of bad idea [#METR-19056]. --- dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h b/dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h index e44da4f3c0a..9e6efe6fa02 100644 --- a/dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h +++ b/dbms/include/DB/Storages/MergeTree/MergeTreeDataMerger.h @@ -53,11 +53,10 @@ public: /** Отменяет все мерджи. Все выполняющиеся сейчас вызовы mergeParts скоро бросят исключение. * Все новые вызовы будут бросать исключения, пока не будет вызван uncancel(). - * Считает количество таких вызовов для поддержки нескольких наложенных друг на друга отмен. */ - void cancel() { ++cancelled; } - void uncancel() { --cancelled; } - bool isCancelled() const { return cancelled > 0; } + void cancel() { cancelled = true; } + void uncancel() { cancelled = false; } + bool isCancelled() const { return cancelled; } private: MergeTreeData & data; @@ -67,7 +66,7 @@ private: /// Когда в последний раз писали в лог, что место на диске кончилось (чтобы не писать об этом слишком часто). time_t disk_space_warning_time = 0; - std::atomic cancelled {0}; + std::atomic cancelled {false}; }; From 03f091c01c852fddf2b6d0d120394be5830490df Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 07:28:01 +0300 Subject: [PATCH 34/40] dbms: fixed error [#METR-19316]. --- .../MergingAggregatedMemoryEfficientBlockInputStream.h | 3 ++- .../MergingAggregatedMemoryEfficientBlockInputStream.cpp | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h b/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h index e5875c874c9..1d989db265f 100644 --- a/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h +++ b/dbms/include/DB/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h @@ -83,7 +83,8 @@ private: boost::threadpool::pool pool; std::mutex get_next_blocks_mutex; ConcurrentBoundedQueue result_queue; - bool exhausted = false; + bool exhausted = false; /// Данных больше нет. + bool finish = false; /// Нужно завершить работу раньше, чем данные закончились. std::atomic active_threads; ParallelMergeData(size_t max_threads) : pool(max_threads), result_queue(max_threads), active_threads(max_threads) {} diff --git a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp index f3b5013bb7f..44e6ad4ccd0 100644 --- a/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp +++ b/dbms/src/DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.cpp @@ -131,7 +131,7 @@ MergingAggregatedMemoryEfficientBlockInputStream::~MergingAggregatedMemoryEffici { std::lock_guard lock(parallel_merge_data->get_next_blocks_mutex); - parallel_merge_data->exhausted = true; + parallel_merge_data->finish = true; } parallel_merge_data->result_queue.clear(); @@ -161,7 +161,7 @@ void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread(MemoryTracker { std::lock_guard lock(parallel_merge_data->get_next_blocks_mutex); - if (parallel_merge_data->exhausted) + if (parallel_merge_data->exhausted || parallel_merge_data->finish) break; blocks_to_merge = getNextBlocksToMerge(); @@ -178,7 +178,7 @@ void MergingAggregatedMemoryEfficientBlockInputStream::mergeThread(MemoryTracker { std::lock_guard lock(parallel_merge_data->get_next_blocks_mutex); - if (parallel_merge_data->exhausted) + if (parallel_merge_data->finish) break; parallel_merge_data->result_queue.push(OutputData(std::move(res))); From e0653fda4ebb905afde952a47fdd70246ee61056 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 07:41:46 +0300 Subject: [PATCH 35/40] dbms: little better [#METR-19172]. --- dbms/src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index 762b84ba5af..6785968ef49 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -3054,10 +3054,10 @@ void StorageReplicatedMergeTree::drop() if (is_readonly) throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY); - auto zookeeper = getZooKeeper(); - shutdown(); + auto zookeeper = getZooKeeper(); + LOG_INFO(log, "Removing replica " << replica_path); replica_is_active_node = nullptr; zookeeper->tryRemoveRecursive(replica_path); From 6260ac34a36623cc75a3d728f8849f9d8ba3aa70 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 08:05:42 +0300 Subject: [PATCH 36/40] dbms: fixed high CPU usage on shutdown [#METR-19056]. --- dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp index f82a694b811..33f481ef328 100644 --- a/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp +++ b/dbms/src/Storages/MergeTree/BackgroundProcessingPool.cpp @@ -118,6 +118,9 @@ void BackgroundProcessingPool::threadFunction() /// O(n), n - число задач. По сути, количество таблиц. Обычно их мало. for (const auto & handle : tasks) { + if (handle->removed) + continue; + time_t next_time_to_execute = handle->next_time_to_execute; if (next_time_to_execute < min_time) @@ -144,9 +147,6 @@ void BackgroundProcessingPool::threadFunction() continue; } - if (task->removed) - continue; - /// Лучшей задачи не нашлось, а эта задача в прошлый раз ничего не сделала, и поэтому ей назначено некоторое время спать. time_t current_time = time(0); if (min_time > current_time) From 6eb47b0db47d0817d8589914db59dfd534a78ab7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 09:04:00 +0300 Subject: [PATCH 37/40] dbms: fixed error with StorageBuffer [#METR-19249]. --- dbms/src/Storages/StorageBuffer.cpp | 17 ++++---- .../0_stateless/00289_buffer_test.reference | 2 + .../queries/0_stateless/00289_buffer_test.sh | 41 +++++++++++++++++++ 3 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00289_buffer_test.reference create mode 100755 dbms/tests/queries/0_stateless/00289_buffer_test.sh diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index b9ac246c68b..158325c973e 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -141,6 +141,9 @@ BlockInputStreams StorageBuffer::read( static void appendBlock(const Block & from, Block & to) { + if (!to) + throw Exception("Cannot append to empty block", ErrorCodes::LOGICAL_ERROR); + size_t rows = from.rows(); for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no) { @@ -243,13 +246,13 @@ private: buffer.first_write_time = time(0); buffer.data = sorted_block.cloneEmpty(); } - - /** Если после вставки в буфер, ограничения будут превышены, то будем сбрасывать буфер. - * Это также защищает от неограниченного потребления оперативки, так как в случае невозможности записать в таблицу, - * будет выкинуто исключение, а новые данные не будут добавлены в буфер. - */ - if (storage.checkThresholds(buffer, time(0), sorted_block.rowsInFirstColumn(), sorted_block.bytes())) + else if (storage.checkThresholds(buffer, time(0), sorted_block.rowsInFirstColumn(), sorted_block.bytes())) { + /** Если после вставки в буфер, ограничения будут превышены, то будем сбрасывать буфер. + * Это также защищает от неограниченного потребления оперативки, так как в случае невозможности записать в таблицу, + * будет выкинуто исключение, а новые данные не будут добавлены в буфер. + */ + lock.unlock(); storage.flushBuffer(buffer, false); lock.lock(); @@ -321,7 +324,7 @@ void StorageBuffer::flushAllBuffers(const bool check_thresholds) void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) { - Block block_to_write; + Block block_to_write = buffer.data.cloneEmpty(); time_t current_time = check_thresholds ? time(0) : 0; /** Довольно много проблем из-за того, что хотим блокировать буфер лишь на короткое время. diff --git a/dbms/tests/queries/0_stateless/00289_buffer_test.reference b/dbms/tests/queries/0_stateless/00289_buffer_test.reference new file mode 100644 index 00000000000..dc546d0f000 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00289_buffer_test.reference @@ -0,0 +1,2 @@ +20000 1 20000 200010000 20000 +20000 1 20000 200010000 20000 diff --git a/dbms/tests/queries/0_stateless/00289_buffer_test.sh b/dbms/tests/queries/0_stateless/00289_buffer_test.sh new file mode 100755 index 00000000000..240b3295914 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00289_buffer_test.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +clickhouse-client -n --query=" + DROP TABLE IF EXISTS test.dst; + DROP TABLE IF EXISTS test.buffer; + + CREATE TABLE test.dst (x UInt64, d Date DEFAULT today()) ENGINE = MergeTree(d, x, 8192); + CREATE TABLE test.buffer (x UInt64, d Date DEFAULT today()) ENGINE = Buffer(test, dst, 16, 1, 100, 10000, 10, 1000, 100000); + "; + +seq 1 1000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 1001 2000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 2001 3000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 3001 4000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 4001 5000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 5001 6000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 6001 7000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 7001 8000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 8001 9000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 9001 10000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 10001 11000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 11001 12000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 12001 13000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 13001 14000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 14001 15000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 15001 16000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 16001 17000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 17001 18000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 18001 19000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & +seq 19001 20000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n & + +wait + +clickhouse-client --query="SELECT count(), min(x), max(x), sum(x), uniqExact(x) FROM test.buffer;"; +clickhouse-client --query="OPTIMIZE TABLE test.buffer;"; +clickhouse-client --query="SELECT count(), min(x), max(x), sum(x), uniqExact(x) FROM test.dst;"; + +clickhouse-client -n --query=" + DROP TABLE test.dst; + DROP TABLE test.buffer; + "; From 9e33ab55193043bf109774822539de271f3cfc8e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 09:10:13 +0300 Subject: [PATCH 38/40] dbms: fixed error with StorageBuffer [#METR-19249]. --- dbms/src/Storages/StorageBuffer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 158325c973e..5e8450d3234 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -243,7 +243,6 @@ private: if (!buffer.data) { - buffer.first_write_time = time(0); buffer.data = sorted_block.cloneEmpty(); } else if (storage.checkThresholds(buffer, time(0), sorted_block.rowsInFirstColumn(), sorted_block.bytes())) @@ -258,6 +257,9 @@ private: lock.lock(); } + if (!buffer.first_write_time) + buffer.first_write_time = time(0); + appendBlock(sorted_block, buffer.data); } }; From e47383477010eb50a19b078ad8a637caf1ff09e7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 09:16:24 +0300 Subject: [PATCH 39/40] dbms: StorageBuffer: write correct info about block flushed [#METR-19249]. --- dbms/src/Storages/StorageBuffer.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 5e8450d3234..95a79b52f28 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -306,14 +306,9 @@ bool StorageBuffer::checkThresholds(Buffer & buffer, time_t current_time, size_t size_t rows = buffer.data.rowsInFirstColumn() + additional_rows; size_t bytes = buffer.data.bytes() + additional_bytes; - bool res = + return (time_passed > min_thresholds.time && rows > min_thresholds.rows && bytes > min_thresholds.bytes) || (time_passed > max_thresholds.time || rows > max_thresholds.rows || bytes > max_thresholds.bytes); - - if (res) - LOG_TRACE(log, "Flushing buffer with " << rows << " rows, " << bytes << " bytes, age " << time_passed << " seconds."); - - return res; } @@ -329,6 +324,10 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) Block block_to_write = buffer.data.cloneEmpty(); time_t current_time = check_thresholds ? time(0) : 0; + size_t rows = 0; + size_t bytes = 0; + time_t time_passed = 0; + /** Довольно много проблем из-за того, что хотим блокировать буфер лишь на короткое время. * Под блокировкой, получаем из буфера блок, и заменяем в нём блок на новый пустой. * Затем пытаемся записать полученный блок в подчинённую таблицу. @@ -338,6 +337,11 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) { std::lock_guard lock(buffer.mutex); + rows = buffer.data.rowsInFirstColumn(); + bytes = buffer.data.bytes(); + if (buffer.first_write_time) + time_passed = current_time - buffer.first_write_time; + if (check_thresholds) { if (!checkThresholds(buffer, current_time)) @@ -345,7 +349,7 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) } else { - if (buffer.data.rowsInFirstColumn() == 0) + if (rows == 0) return; } @@ -353,6 +357,8 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) buffer.first_write_time = 0; } + LOG_TRACE(log, "Flushing buffer with " << rows << " rows, " << bytes << " bytes, age " << time_passed << " seconds."); + if (no_destination) return; From db8d82f13e03e7fe33aecd6d08423d09054e1199 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 9 Dec 2015 09:55:49 +0300 Subject: [PATCH 40/40] dbms: Buffer: better [#METR-19249]. --- dbms/include/DB/Storages/StorageBuffer.h | 3 ++- dbms/src/Storages/StorageBuffer.cpp | 18 +++++++++++++----- .../queries/0_stateless/00289_buffer_test.sh | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/dbms/include/DB/Storages/StorageBuffer.h b/dbms/include/DB/Storages/StorageBuffer.h index 0ec8d04b79b..a601dd7302f 100644 --- a/dbms/include/DB/Storages/StorageBuffer.h +++ b/dbms/include/DB/Storages/StorageBuffer.h @@ -128,7 +128,8 @@ private: void flushAllBuffers(bool check_thresholds = true); /// Сбросить буфер. Если выставлено check_thresholds - сбрасывает только если превышены пороги. void flushBuffer(Buffer & buffer, bool check_thresholds); - bool checkThresholds(Buffer & buffer, time_t current_time, size_t additional_rows = 0, size_t additional_bytes = 0); + bool checkThresholds(const Buffer & buffer, time_t current_time, size_t additional_rows = 0, size_t additional_bytes = 0) const; + bool checkThresholdsImpl(size_t rows, size_t bytes, time_t time_passed) const; /// Аргумент table передаётся, так как иногда вычисляется заранее. Он должен соответствовать destination-у. void writeBlockToDestination(const Block & block, StoragePtr table); diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp index 95a79b52f28..65455caf7b0 100644 --- a/dbms/src/Storages/StorageBuffer.cpp +++ b/dbms/src/Storages/StorageBuffer.cpp @@ -238,6 +238,8 @@ private: void insertIntoBuffer(const Block & block, StorageBuffer::Buffer & buffer, std::unique_lock && lock) { + time_t current_time = time(0); + /// Сортируем столбцы в блоке. Это нужно, чтобы было проще потом конкатенировать блоки. Block sorted_block = block.sortColumns(); @@ -245,7 +247,7 @@ private: { buffer.data = sorted_block.cloneEmpty(); } - else if (storage.checkThresholds(buffer, time(0), sorted_block.rowsInFirstColumn(), sorted_block.bytes())) + else if (storage.checkThresholds(buffer, current_time, sorted_block.rowsInFirstColumn(), sorted_block.bytes())) { /** Если после вставки в буфер, ограничения будут превышены, то будем сбрасывать буфер. * Это также защищает от неограниченного потребления оперативки, так как в случае невозможности записать в таблицу, @@ -258,7 +260,7 @@ private: } if (!buffer.first_write_time) - buffer.first_write_time = time(0); + buffer.first_write_time = current_time; appendBlock(sorted_block, buffer.data); } @@ -297,7 +299,7 @@ bool StorageBuffer::optimize(const Settings & settings) } -bool StorageBuffer::checkThresholds(Buffer & buffer, time_t current_time, size_t additional_rows, size_t additional_bytes) +bool StorageBuffer::checkThresholds(const Buffer & buffer, time_t current_time, size_t additional_rows, size_t additional_bytes) const { time_t time_passed = 0; if (buffer.first_write_time) @@ -306,6 +308,12 @@ bool StorageBuffer::checkThresholds(Buffer & buffer, time_t current_time, size_t size_t rows = buffer.data.rowsInFirstColumn() + additional_rows; size_t bytes = buffer.data.bytes() + additional_bytes; + return checkThresholdsImpl(rows, bytes, time_passed); +} + + +bool StorageBuffer::checkThresholdsImpl(size_t rows, size_t bytes, time_t time_passed) const +{ return (time_passed > min_thresholds.time && rows > min_thresholds.rows && bytes > min_thresholds.bytes) || (time_passed > max_thresholds.time || rows > max_thresholds.rows || bytes > max_thresholds.bytes); @@ -322,7 +330,7 @@ void StorageBuffer::flushAllBuffers(const bool check_thresholds) void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) { Block block_to_write = buffer.data.cloneEmpty(); - time_t current_time = check_thresholds ? time(0) : 0; + time_t current_time = time(0); size_t rows = 0; size_t bytes = 0; @@ -344,7 +352,7 @@ void StorageBuffer::flushBuffer(Buffer & buffer, bool check_thresholds) if (check_thresholds) { - if (!checkThresholds(buffer, current_time)) + if (!checkThresholdsImpl(rows, bytes, time_passed)) return; } else diff --git a/dbms/tests/queries/0_stateless/00289_buffer_test.sh b/dbms/tests/queries/0_stateless/00289_buffer_test.sh index 240b3295914..5a7aaab07f3 100755 --- a/dbms/tests/queries/0_stateless/00289_buffer_test.sh +++ b/dbms/tests/queries/0_stateless/00289_buffer_test.sh @@ -5,7 +5,7 @@ clickhouse-client -n --query=" DROP TABLE IF EXISTS test.buffer; CREATE TABLE test.dst (x UInt64, d Date DEFAULT today()) ENGINE = MergeTree(d, x, 8192); - CREATE TABLE test.buffer (x UInt64, d Date DEFAULT today()) ENGINE = Buffer(test, dst, 16, 1, 100, 10000, 10, 1000, 100000); + CREATE TABLE test.buffer (x UInt64, d Date DEFAULT today()) ENGINE = Buffer(test, dst, 16, 1, 10, 100, 1000, 10000, 100000); "; seq 1 1000 | sed -r -e 's/^(.+)$/INSERT INTO test.buffer (x) VALUES (\1);/' | clickhouse-client -n &