From 0117c194c50e6ac399d3eab18eec370f5908d99f Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Apr 2020 13:33:59 +0300 Subject: [PATCH 1/3] Fix GroupingAggregatedTransform for single-level aggregation. Add test. --- docker/test/stateless/Dockerfile | 1 + ...gingAggregatedMemoryEfficientTransform.cpp | 9 ++++++-- src/Storages/StorageDistributed.cpp | 2 +- tests/config/clusters.xml | 20 ++++++++++++++++ ...tion_memory_efficient_mix_levels.reference | 10 ++++++++ ...ggregation_memory_efficient_mix_levels.sql | 23 +++++++++++++++++++ 6 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 tests/config/clusters.xml create mode 100644 tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.reference create mode 100644 tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 2d2025de58b..977c580ef43 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -68,6 +68,7 @@ CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/disks.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/secure_ports.xml /etc/clickhouse-server/config.d/; \ + ln -s /usr/share/clickhouse-test/config/clusters.xml /etc/clickhouse-server/config.d/; \ ln -s /usr/share/clickhouse-test/config/server.key /etc/clickhouse-server/; \ ln -s /usr/share/clickhouse-test/config/server.crt /etc/clickhouse-server/; \ ln -s /usr/share/clickhouse-test/config/dhparam.pem /etc/clickhouse-server/; \ diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index 4c0323fcf6a..cabe74b36e9 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -275,15 +275,20 @@ void GroupingAggregatedTransform::work() { if (!single_level_chunks.empty()) { - auto & header = getOutputs().front().getHeader(); + auto & header = getInputs().front().getHeader(); auto block = header.cloneWithColumns(single_level_chunks.back().detachColumns()); single_level_chunks.pop_back(); auto blocks = params->aggregator.convertBlockToTwoLevel(block); for (auto & cur_block : blocks) { + if (!cur_block) + continue; + Int32 bucket = cur_block.info.bucket_num; - chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows())); + auto chunk_info = std::make_shared(); + chunk_info->bucket_num = bucket; + chunks_map[bucket].emplace_back(Chunk(cur_block.getColumns(), cur_block.rows(), std::move(chunk_info))); } } } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index b4375dd5b0a..adf444c3565 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -474,7 +474,7 @@ void StorageDistributed::alter(const AlterCommands & params, const Context & con void StorageDistributed::startup() { if (remote_database.empty() && !remote_table_function_ptr) - LOG_WARNING(log, "Name of remote database is empty. Default database will be used implicitly."); + LOG_INFO(log, "Name of remote database is empty. Default database will be used implicitly."); if (!volume) return; diff --git a/tests/config/clusters.xml b/tests/config/clusters.xml new file mode 100644 index 00000000000..c0babf0ff89 --- /dev/null +++ b/tests/config/clusters.xml @@ -0,0 +1,20 @@ + + + + + + shard_0 + localhost + 9000 + + + + + shard_1 + localhost + 9000 + + + + + \ No newline at end of file diff --git a/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.reference b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.reference new file mode 100644 index 00000000000..ac13b3f193e --- /dev/null +++ b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.reference @@ -0,0 +1,10 @@ +0 2 +1 1 +2 1 +3 1 +4 1 +5 1 +6 1 +7 1 +8 1 +9 1 diff --git a/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql new file mode 100644 index 00000000000..6e4feda346f --- /dev/null +++ b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql @@ -0,0 +1,23 @@ +create database if not exists shard_0; +create database if not exists shard_1; + +drop table if exists shard_0.shard_01231_distributed_aggregation_memory_efficient; +drop table if exists shard_1.shard_01231_distributed_aggregation_memory_efficient; +drop table if exists ma_dist; + +create table shard_0.shard_01231_distributed_aggregation_memory_efficient (x UInt64) engine = MergeTree order by x; +create table shard_1.shard_01231_distributed_aggregation_memory_efficient (x UInt64) engine = MergeTree order by x; + +insert into shard_0.shard_01231_distributed_aggregation_memory_efficient select * from numbers(1); +insert into shard_1.shard_01231_distributed_aggregation_memory_efficient select * from numbers(10); + +create table ma_dist (x UInt64) ENGINE = Distributed(test_cluster_two_shards_different_databases, '', 'shard_01231_distributed_aggregation_memory_efficient'); + +set distributed_aggregation_memory_efficient = 1; +set group_by_two_level_threshold = 2; +set max_bytes_before_external_group_by = 16; + +select x, count() from ma_dist group by x order by x; + +drop table if exists shard_0.shard_01231_distributed_aggregation_memory_efficient; +drop table if exists shard_1.shard_01231_distributed_aggregation_memory_efficient; From 8986cf688b719e2e7dcd3fd60cdc5b2afff793c2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Apr 2020 16:28:46 +0300 Subject: [PATCH 2/3] Added comment. --- .../Transforms/MergingAggregatedMemoryEfficientTransform.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index cabe74b36e9..12d289deaed 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -275,7 +275,7 @@ void GroupingAggregatedTransform::work() { if (!single_level_chunks.empty()) { - auto & header = getInputs().front().getHeader(); + auto & header = getInputs().front().getHeader(); /// Take header from input port. Output header is empty. auto block = header.cloneWithColumns(single_level_chunks.back().detachColumns()); single_level_chunks.pop_back(); auto blocks = params->aggregator.convertBlockToTwoLevel(block); From 4fd5ef8bad188a9bd0f4b7bc5d686a0c8761cc39 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 6 Apr 2020 21:05:45 +0300 Subject: [PATCH 3/3] Review fixes. --- src/Storages/StorageDistributed.cpp | 2 +- ...1231_distributed_aggregation_memory_efficient_mix_levels.sql | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index adf444c3565..b4375dd5b0a 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -474,7 +474,7 @@ void StorageDistributed::alter(const AlterCommands & params, const Context & con void StorageDistributed::startup() { if (remote_database.empty() && !remote_table_function_ptr) - LOG_INFO(log, "Name of remote database is empty. Default database will be used implicitly."); + LOG_WARNING(log, "Name of remote database is empty. Default database will be used implicitly."); if (!volume) return; diff --git a/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql index 6e4feda346f..31f09b35bf3 100644 --- a/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql +++ b/tests/queries/0_stateless/01231_distributed_aggregation_memory_efficient_mix_levels.sql @@ -1,3 +1,5 @@ +set send_logs_level = 'error'; + create database if not exists shard_0; create database if not exists shard_1;