From a7db4255926d984572370193cf36096950758b2c Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 5 Aug 2019 22:15:15 +0800 Subject: [PATCH 1/3] Optimize Count() Cond. Follow up https://github.com/yandex/ClickHouse/pull/6028 --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 34 +++++++++++++------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 13f9458b835..e60bdc94a58 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -982,6 +982,18 @@ void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptio aggregates = aggregate_descriptions; } +struct ColumnExt +{ + size_t compressed_size; + size_t type_size; + size_t uncompressed_size; + String name; + bool operator<(const ColumnExt & that) const + { + return std::tie(compressed_size, type_size, uncompressed_size) < std::tie(that.compressed_size, that.type_size, that.uncompressed_size); + } +}; + void ExpressionAnalyzer::collectUsedColumns() { /** Calculate which columns are required to execute the expression. @@ -1040,23 +1052,23 @@ void ExpressionAnalyzer::collectUsedColumns() /// You need to read at least one column to find the number of rows. if (select_query && required.empty()) { - /// We will find a column with minimum compressed size. Because it is the column that is cheapest to read. - size_t min_data_compressed = 0; - String min_column_name; + /// We will find a column with minimum . + /// Because it is the column that is cheapest to read. + std::vector columns; if (storage) { auto column_sizes = storage->getColumnSizes(); - for (auto & [column_name, column_size] : column_sizes) + for (auto & source_column : source_columns) { - if (min_data_compressed == 0 || min_data_compressed > column_size.data_compressed) - { - min_data_compressed = column_size.data_compressed; - min_column_name = column_name; - } + auto c = column_sizes.find(source_column.name); + if (c == column_sizes.end()) + continue; + size_t type_size = source_column.type->haveMaximumSizeOfValue() ? source_column.type->getMaximumSizeOfValueInMemory() : 100; + columns.emplace_back(ColumnExt{c->second.data_compressed, type_size, c->second.data_uncompressed, source_column.name}); } } - if (min_data_compressed > 0) - required.insert(min_column_name); + if (columns.size()) + required.insert(std::min_element(columns.begin(), columns.end())->name); else /// If we have no information about columns sizes, choose a column of minimum size of its data type. required.insert(ExpressionActions::getSmallestColumn(source_columns)); From 9950c9442b32cd5da0b5f83ee93d582c7e31de2a Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 6 Aug 2019 09:34:49 +0800 Subject: [PATCH 2/3] Update --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 28 ++++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index e60bdc94a58..fdc8226a42a 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -982,18 +982,6 @@ void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptio aggregates = aggregate_descriptions; } -struct ColumnExt -{ - size_t compressed_size; - size_t type_size; - size_t uncompressed_size; - String name; - bool operator<(const ColumnExt & that) const - { - return std::tie(compressed_size, type_size, uncompressed_size) < std::tie(that.compressed_size, that.type_size, that.uncompressed_size); - } -}; - void ExpressionAnalyzer::collectUsedColumns() { /** Calculate which columns are required to execute the expression. @@ -1054,7 +1042,19 @@ void ExpressionAnalyzer::collectUsedColumns() { /// We will find a column with minimum . /// Because it is the column that is cheapest to read. - std::vector columns; + struct ColumnSizeTuple + { + size_t compressed_size; + size_t type_size; + size_t uncompressed_size; + String name; + bool operator<(const ColumnSizeTuple & that) const + { + return std::tie(compressed_size, type_size, uncompressed_size) + < std::tie(that.compressed_size, that.type_size, that.uncompressed_size); + } + }; + std::vector columns; if (storage) { auto column_sizes = storage->getColumnSizes(); @@ -1064,7 +1064,7 @@ void ExpressionAnalyzer::collectUsedColumns() if (c == column_sizes.end()) continue; size_t type_size = source_column.type->haveMaximumSizeOfValue() ? source_column.type->getMaximumSizeOfValueInMemory() : 100; - columns.emplace_back(ColumnExt{c->second.data_compressed, type_size, c->second.data_uncompressed, source_column.name}); + columns.emplace_back(ColumnSizeTuple{c->second.data_compressed, type_size, c->second.data_uncompressed, source_column.name}); } } if (columns.size()) From ecb25067a5c50f95b00ff7418b00d6457ccc045f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 6 Aug 2019 13:32:07 +0300 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f10e8da9d7b..f9c461e63e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ * Fixed the possibility of hanging queries when server is overloaded. [#6301](https://github.com/yandex/ClickHouse/pull/6301) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fix FPE in yandexConsistentHash function. This fixes [#6304](https://github.com/yandex/ClickHouse/issues/6304). [#6126](https://github.com/yandex/ClickHouse/pull/6126) ([alexey-milovidov](https://github.com/alexey-milovidov)) * Fixed bug in conversion of `LowCardinality` types in `AggregateFunctionFactory`. This fixes [#6257](https://github.com/yandex/ClickHouse/issues/6257). [#6281](https://github.com/yandex/ClickHouse/pull/6281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* Fix parsing of `bool` settings from `true` and `false` strings. [#6278](https://github.com/yandex/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) +* Fix parsing of `bool` settings from `true` and `false` strings in configuration files. [#6278](https://github.com/yandex/ClickHouse/pull/6278) ([alesapin](https://github.com/alesapin)) * Fix rare bug with incompatible stream headers in queries to `Distributed` table over `MergeTree` table when part of `WHERE` moves to `PREWHERE`. [#6236](https://github.com/yandex/ClickHouse/pull/6236) ([alesapin](https://github.com/alesapin)) * Fixed overflow in integer division of signed type to unsigned type. This fixes [#6214](https://github.com/yandex/ClickHouse/issues/6214). [#6233](https://github.com/yandex/ClickHouse/pull/6233) ([alexey-milovidov](https://github.com/alexey-milovidov))