mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
Merge pull request #37253 from Avogar/fix-defaults
Fix inserting defaults for missing values in columnar formats
This commit is contained in:
commit
754e675ec3
@ -196,7 +196,7 @@ static auto getNameRange(const ColumnsDescription::ColumnsContainer & columns, c
|
||||
return std::make_pair(begin, end);
|
||||
}
|
||||
|
||||
void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first)
|
||||
void ColumnsDescription::add(ColumnDescription column, const String & after_column, bool first, bool add_subcolumns)
|
||||
{
|
||||
if (has(column.name))
|
||||
throw Exception("Cannot add column " + column.name + ": column with this name already exists",
|
||||
@ -222,7 +222,8 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu
|
||||
insert_it = range.second;
|
||||
}
|
||||
|
||||
addSubcolumns(column.name, column.type);
|
||||
if (add_subcolumns)
|
||||
addSubcolumns(column.name, column.type);
|
||||
columns.get<0>().insert(insert_it, std::move(column));
|
||||
}
|
||||
|
||||
@ -572,6 +573,27 @@ std::optional<NameAndTypePair> ColumnsDescription::tryGetColumnOrSubcolumn(GetCo
|
||||
return tryGetColumn(GetColumnsOptions(kind).withSubcolumns(), column_name);
|
||||
}
|
||||
|
||||
std::optional<const ColumnDescription> ColumnsDescription::tryGetColumnDescription(const GetColumnsOptions & options, const String & column_name) const
|
||||
{
|
||||
auto it = columns.get<1>().find(column_name);
|
||||
if (it != columns.get<1>().end() && (defaultKindToGetKind(it->default_desc.kind) & options.kind))
|
||||
return *it;
|
||||
|
||||
if (options.with_subcolumns)
|
||||
{
|
||||
auto jt = subcolumns.get<0>().find(column_name);
|
||||
if (jt != subcolumns.get<0>().end())
|
||||
return ColumnDescription{jt->name, jt->type};
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::optional<const ColumnDescription> ColumnsDescription::tryGetColumnOrSubcolumnDescription(GetColumnsOptions::Kind kind, const String & column_name) const
|
||||
{
|
||||
return tryGetColumnDescription(GetColumnsOptions(kind).withSubcolumns(), column_name);
|
||||
}
|
||||
|
||||
NameAndTypePair ColumnsDescription::getColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const
|
||||
{
|
||||
auto column = tryGetColumnOrSubcolumn(kind, column_name);
|
||||
|
@ -100,7 +100,7 @@ public:
|
||||
explicit ColumnsDescription(NamesAndTypesList ordinary, NamesAndAliases aliases);
|
||||
|
||||
/// `after_column` can be a Nested column name;
|
||||
void add(ColumnDescription column, const String & after_column = String(), bool first = false);
|
||||
void add(ColumnDescription column, const String & after_column = String(), bool first = false, bool add_subcolumns = true);
|
||||
/// `column_name` can be a Nested column name;
|
||||
void remove(const String & column_name);
|
||||
|
||||
@ -180,6 +180,9 @@ public:
|
||||
std::optional<NameAndTypePair> tryGetColumnOrSubcolumn(GetColumnsOptions::Kind kind, const String & column_name) const;
|
||||
std::optional<NameAndTypePair> tryGetColumn(const GetColumnsOptions & options, const String & column_name) const;
|
||||
|
||||
std::optional<const ColumnDescription> tryGetColumnOrSubcolumnDescription(GetColumnsOptions::Kind kind, const String & column_name) const;
|
||||
std::optional<const ColumnDescription> tryGetColumnDescription(const GetColumnsOptions & options, const String & column_name) const;
|
||||
|
||||
ColumnDefaults getDefaults() const; /// TODO: remove
|
||||
bool hasDefault(const String & column_name) const;
|
||||
bool hasDefaults() const;
|
||||
|
@ -539,8 +539,7 @@ Pipe StorageHDFS::read(
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
|
||||
columns_description = ColumnsDescription{
|
||||
storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()};
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
}
|
||||
else
|
||||
|
@ -691,8 +691,7 @@ Pipe StorageFile::read(
|
||||
const auto get_columns_for_format = [&]() -> ColumnsDescription
|
||||
{
|
||||
if (isColumnOriented())
|
||||
return ColumnsDescription{
|
||||
storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()};
|
||||
return storage_snapshot->getDescriptionForColumns(column_names);
|
||||
else
|
||||
return storage_snapshot->metadata->getColumns();
|
||||
};
|
||||
|
@ -719,8 +719,7 @@ Pipe StorageS3::read(
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
|
||||
columns_description = ColumnsDescription{
|
||||
storage_snapshot->getSampleBlockForColumns(fetch_columns).getNamesAndTypesList()};
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
}
|
||||
else
|
||||
|
@ -92,32 +92,40 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co
|
||||
Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const
|
||||
{
|
||||
Block res;
|
||||
auto columns_description = getDescriptionForColumns(column_names);
|
||||
for (const auto & column : columns_description)
|
||||
res.insert({column.type->createColumn(), column.type, column.name});
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ColumnsDescription StorageSnapshot::getDescriptionForColumns(const Names & column_names) const
|
||||
{
|
||||
ColumnsDescription res;
|
||||
const auto & columns = getMetadataForQuery()->getColumns();
|
||||
for (const auto & name : column_names)
|
||||
{
|
||||
auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name);
|
||||
auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name);
|
||||
|
||||
auto column = columns.tryGetColumnOrSubcolumnDescription(GetColumnsOptions::All, name);
|
||||
auto object_column = object_columns.tryGetColumnOrSubcolumnDescription(GetColumnsOptions::All, name);
|
||||
if (column && !object_column)
|
||||
{
|
||||
res.insert({column->type->createColumn(), column->type, column->name});
|
||||
res.add(*column, "", false, false);
|
||||
}
|
||||
else if (object_column)
|
||||
{
|
||||
res.insert({object_column->type->createColumn(), object_column->type, object_column->name});
|
||||
res.add(*object_column, "", false, false);
|
||||
}
|
||||
else if (auto it = virtual_columns.find(name); it != virtual_columns.end())
|
||||
{
|
||||
/// Virtual columns must be appended after ordinary, because user can
|
||||
/// override them.
|
||||
const auto & type = it->second;
|
||||
res.insert({type->createColumn(), type, name});
|
||||
res.add({name, type});
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK,
|
||||
"Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs());
|
||||
"Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,6 +68,8 @@ struct StorageSnapshot
|
||||
/// Block with ordinary + materialized + aliases + virtuals + subcolumns.
|
||||
Block getSampleBlockForColumns(const Names & column_names) const;
|
||||
|
||||
ColumnsDescription getDescriptionForColumns(const Names & column_names) const;
|
||||
|
||||
/// Verify that all the requested names are in the table and are set correctly:
|
||||
/// list of names is not empty and the names do not repeat.
|
||||
void check(const Names & column_names) const;
|
||||
|
@ -602,8 +602,7 @@ Pipe IStorageURLBase::read(
|
||||
Block block_for_format;
|
||||
if (isColumnOriented())
|
||||
{
|
||||
columns_description = ColumnsDescription{
|
||||
storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()};
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(column_names);
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
}
|
||||
else
|
||||
@ -690,8 +689,7 @@ Pipe StorageURLWithFailover::read(
|
||||
Block block_for_format;
|
||||
if (isColumnOriented())
|
||||
{
|
||||
columns_description = ColumnsDescription{
|
||||
storage_snapshot->getSampleBlockForColumns(column_names).getNamesAndTypesList()};
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(column_names);
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
}
|
||||
else
|
||||
|
@ -0,0 +1,3 @@
|
||||
1 42 43
|
||||
1 42 43
|
||||
1 42 43
|
@ -0,0 +1,8 @@
|
||||
-- Tags: no-fasttest, no-parallel
|
||||
|
||||
insert into function file(data_02302.parquet) select 1 as x settings engine_file_truncate_on_insert=1;
|
||||
select * from file(data_02302.parquet, auto, 'x UInt8, y default 42, z default x + y') settings input_format_parquet_allow_missing_columns=1;
|
||||
insert into function file(data_02302.orc) select 1 as x settings engine_file_truncate_on_insert=1;
|
||||
select * from file(data_02302.orc, auto, 'x UInt8, y default 42, z default x + y') settings input_format_orc_allow_missing_columns=1;
|
||||
insert into function file(data_02302.arrow) select 1 as x settings engine_file_truncate_on_insert=1;
|
||||
select * from file(data_02302.arrow, auto, 'x UInt8, y default 42, z default x + y') settings input_format_arrow_allow_missing_columns=1;
|
Loading…
Reference in New Issue
Block a user