mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 01:00:48 +00:00
Analyzer aggregation without column fix
This commit is contained in:
parent
91226abbfe
commit
fbba28b31e
@ -790,10 +790,10 @@ void ExpressionActions::assertDeterministic() const
|
||||
}
|
||||
|
||||
|
||||
std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns)
|
||||
NameAndTypePair ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns)
|
||||
{
|
||||
std::optional<size_t> min_size;
|
||||
String res;
|
||||
NameAndTypePair result;
|
||||
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
@ -807,14 +807,14 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum
|
||||
if (!min_size || size < *min_size)
|
||||
{
|
||||
min_size = size;
|
||||
res = column.name;
|
||||
result = column;
|
||||
}
|
||||
}
|
||||
|
||||
if (!min_size)
|
||||
throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return res;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string ExpressionActions::dumpActions() const
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
std::string dumpActions() const;
|
||||
JSONBuilder::ItemPtr toTree() const;
|
||||
|
||||
static std::string getSmallestColumn(const NamesAndTypesList & columns);
|
||||
static NameAndTypePair getSmallestColumn(const NamesAndTypesList & columns);
|
||||
|
||||
/// Check if column is always zero. True if it's definite, false if we can't say for sure.
|
||||
/// Call it only after subqueries for sets were executed.
|
||||
|
@ -1146,7 +1146,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
required.insert(std::min_element(columns.begin(), columns.end())->name);
|
||||
else if (!source_columns.empty())
|
||||
/// If we have no information about columns sizes, choose a column of minimum size of its data type.
|
||||
required.insert(ExpressionActions::getSmallestColumn(source_columns));
|
||||
required.insert(ExpressionActions::getSmallestColumn(source_columns).name);
|
||||
}
|
||||
else if (is_select && storage_snapshot && !columns_context.has_array_join)
|
||||
{
|
||||
|
@ -81,6 +81,63 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names,
|
||||
query_context->checkAccess(AccessType::SELECT, storage_id, column_names);
|
||||
}
|
||||
|
||||
NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot)
|
||||
{
|
||||
/** We need to read at least one column to find the number of rows.
|
||||
* We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
|
||||
* Because it is the column that is cheapest to read.
|
||||
*/
|
||||
class ColumnWithSize
|
||||
{
|
||||
public:
|
||||
ColumnWithSize(NameAndTypePair column_, ColumnSize column_size_)
|
||||
: column(std::move(column_))
|
||||
, compressed_size(column_size_.data_compressed)
|
||||
, uncompressed_size(column_size_.data_uncompressed)
|
||||
, type_size(column.type->haveMaximumSizeOfValue() ? column.type->getMaximumSizeOfValueInMemory() : 100)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator<(const ColumnWithSize & rhs) const
|
||||
{
|
||||
return std::tie(compressed_size, type_size, uncompressed_size)
|
||||
< std::tie(rhs.compressed_size, rhs.type_size, rhs.uncompressed_size);
|
||||
}
|
||||
|
||||
NameAndTypePair column;
|
||||
size_t compressed_size = 0;
|
||||
size_t uncompressed_size = 0;
|
||||
size_t type_size = 0;
|
||||
};
|
||||
|
||||
std::vector<ColumnWithSize> columns_with_sizes;
|
||||
|
||||
auto column_sizes = storage->getColumnSizes();
|
||||
auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns());
|
||||
|
||||
if (!column_sizes.empty())
|
||||
{
|
||||
for (auto & column_name_and_type : column_names_and_types)
|
||||
{
|
||||
auto it = column_sizes.find(column_name_and_type.name);
|
||||
if (it == column_sizes.end())
|
||||
continue;
|
||||
|
||||
columns_with_sizes.emplace_back(column_name_and_type, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
NameAndTypePair result;
|
||||
|
||||
if (!columns_with_sizes.empty())
|
||||
result = std::min_element(columns_with_sizes.begin(), columns_with_sizes.end())->column;
|
||||
else
|
||||
/// If we have no information about columns sizes, choose a column of minimum size of its data type
|
||||
result = ExpressionActions::getSmallestColumn(column_names_and_types);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
|
||||
SelectQueryInfo & select_query_info,
|
||||
const SelectQueryOptions & select_query_options,
|
||||
@ -127,9 +184,7 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
|
||||
|
||||
if (columns_names.empty())
|
||||
{
|
||||
auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns());
|
||||
auto additional_column_to_read = column_names_and_types.front();
|
||||
|
||||
auto additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot);
|
||||
const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression);
|
||||
columns_names.push_back(additional_column_to_read.name);
|
||||
table_expression_data.addColumn(additional_column_to_read, column_identifier);
|
||||
|
@ -1023,7 +1023,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
|
||||
if (result.column_names_to_read.empty())
|
||||
{
|
||||
NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical();
|
||||
result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
|
||||
result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns).name);
|
||||
}
|
||||
|
||||
// storage_snapshot->check(result.column_names_to_read);
|
||||
|
@ -599,7 +599,7 @@ Pipe StorageHDFS::read(
|
||||
{ return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
|
||||
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
|
||||
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
|
@ -706,7 +706,7 @@ Pipe StorageFile::read(
|
||||
});
|
||||
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
|
||||
}
|
||||
else
|
||||
|
@ -488,7 +488,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
|
||||
|
||||
column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
|
||||
if (column_names_as_aliases.empty())
|
||||
column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()));
|
||||
column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
|
||||
}
|
||||
|
||||
auto source_pipeline = createSources(
|
||||
@ -574,7 +574,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
|
||||
{
|
||||
/// If there are only virtual columns in query, you must request at least one other column.
|
||||
if (real_column_names.empty())
|
||||
real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
|
||||
|
||||
QueryPlan plan;
|
||||
if (StorageView * view = dynamic_cast<StorageView *>(storage.get()))
|
||||
|
@ -0,0 +1 @@
|
||||
1
|
@ -0,0 +1,15 @@
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
DROP TABLE IF EXISTS test_table;
|
||||
CREATE TABLE test_table
|
||||
(
|
||||
c0 String ALIAS c1,
|
||||
c1 String,
|
||||
c2 String,
|
||||
) ENGINE = MergeTree ORDER BY c1;
|
||||
|
||||
INSERT INTO test_table VALUES ('a', 'b');
|
||||
|
||||
SELECT MAX(1) FROM test_table;
|
||||
|
||||
DROP TABLE test_table;
|
Loading…
Reference in New Issue
Block a user