diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index d89be9f3e2e..5ea29615942 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -790,10 +790,10 @@ void ExpressionActions::assertDeterministic() const } -std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns) +NameAndTypePair ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns) { std::optional min_size; - String res; + NameAndTypePair result; for (const auto & column : columns) { @@ -807,14 +807,14 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum if (!min_size || size < *min_size) { min_size = size; - res = column.name; + result = column; } } if (!min_size) throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR); - return res; + return result; } std::string ExpressionActions::dumpActions() const diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index be63b9e0d78..faefe0985f7 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -111,7 +111,7 @@ public: std::string dumpActions() const; JSONBuilder::ItemPtr toTree() const; - static std::string getSmallestColumn(const NamesAndTypesList & columns); + static NameAndTypePair getSmallestColumn(const NamesAndTypesList & columns); /// Check if column is always zero. True if it's definite, false if we can't say for sure. /// Call it only after subqueries for sets were executed. diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 20c14b8d7b6..a1b3c8011cd 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1146,7 +1146,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select required.insert(std::min_element(columns.begin(), columns.end())->name); else if (!source_columns.empty()) /// If we have no information about columns sizes, choose a column of minimum size of its data type. - required.insert(ExpressionActions::getSmallestColumn(source_columns)); + required.insert(ExpressionActions::getSmallestColumn(source_columns).name); } else if (is_select && storage_snapshot && !columns_context.has_array_join) { diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 3584c9d4caa..999aa32d850 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -81,6 +81,63 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, query_context->checkAccess(AccessType::SELECT, storage_id, column_names); } +NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot) +{ + /** We need to read at least one column to find the number of rows. + * We will find a column with minimum . + * Because it is the column that is cheapest to read. + */ + class ColumnWithSize + { + public: + ColumnWithSize(NameAndTypePair column_, ColumnSize column_size_) + : column(std::move(column_)) + , compressed_size(column_size_.data_compressed) + , uncompressed_size(column_size_.data_uncompressed) + , type_size(column.type->haveMaximumSizeOfValue() ? column.type->getMaximumSizeOfValueInMemory() : 100) + { + } + + bool operator<(const ColumnWithSize & rhs) const + { + return std::tie(compressed_size, type_size, uncompressed_size) + < std::tie(rhs.compressed_size, rhs.type_size, rhs.uncompressed_size); + } + + NameAndTypePair column; + size_t compressed_size = 0; + size_t uncompressed_size = 0; + size_t type_size = 0; + }; + + std::vector columns_with_sizes; + + auto column_sizes = storage->getColumnSizes(); + auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns()); + + if (!column_sizes.empty()) + { + for (auto & column_name_and_type : column_names_and_types) + { + auto it = column_sizes.find(column_name_and_type.name); + if (it == column_sizes.end()) + continue; + + columns_with_sizes.emplace_back(column_name_and_type, it->second); + } + } + + NameAndTypePair result; + + if (!columns_with_sizes.empty()) + result = std::min_element(columns_with_sizes.begin(), columns_with_sizes.end())->column; + else + /// If we have no information about columns sizes, choose a column of minimum size of its data type + result = ExpressionActions::getSmallestColumn(column_names_and_types); + + return result; +} + QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, @@ -127,9 +184,7 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, if (columns_names.empty()) { - auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns()); - auto additional_column_to_read = column_names_and_types.front(); - + auto additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot); const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression); columns_names.push_back(additional_column_to_read.name); table_expression_data.addColumn(additional_column_to_read, column_identifier); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0d8fe84f9d3..e5ad2729e6c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1023,7 +1023,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( if (result.column_names_to_read.empty()) { NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical(); - result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns)); + result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns).name); } // storage_snapshot->check(result.column_names_to_read); diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index bbabd523c45..c7008a317c3 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -599,7 +599,7 @@ Pipe StorageHDFS::read( { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 95bd0e7c53e..922754c2d8c 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -706,7 +706,7 @@ Pipe StorageFile::read( }); if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); } else diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 79efab9e9d7..3e279b408d7 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -488,7 +488,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu column_names_as_aliases = alias_actions->getRequiredColumns().getNames(); if (column_names_as_aliases.empty()) - column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical())); + column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } auto source_pipeline = createSources( @@ -574,7 +574,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.empty()) - real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); QueryPlan plan; if (StorageView * view = dynamic_cast(storage.get())) diff --git a/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql new file mode 100644 index 00000000000..105bce6711c --- /dev/null +++ b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql @@ -0,0 +1,15 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + c0 String ALIAS c1, + c1 String, + c2 String, +) ENGINE = MergeTree ORDER BY c1; + +INSERT INTO test_table VALUES ('a', 'b'); + +SELECT MAX(1) FROM test_table; + +DROP TABLE test_table;