mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-30 13:40:50 +00:00
fix nested and subcolumns
This commit is contained in:
parent
d7aad3bf79
commit
06d5b87bc9
@ -86,7 +86,7 @@ Block flatten(const Block & block)
|
|||||||
for (const auto & elem : block)
|
for (const auto & elem : block)
|
||||||
{
|
{
|
||||||
const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(elem.type.get());
|
const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(elem.type.get());
|
||||||
if (!isNested(elem.type) && type_arr)
|
if (type_arr)
|
||||||
{
|
{
|
||||||
const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get());
|
const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type_arr->getNestedType().get());
|
||||||
if (type_tuple && type_tuple->haveExplicitNames())
|
if (type_tuple && type_tuple->haveExplicitNames())
|
||||||
@ -130,12 +130,14 @@ Block flatten(const Block & block)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
NamesAndTypesList collect(const NamesAndTypesList & names_and_types)
|
|
||||||
{
|
{
|
||||||
NamesAndTypesList res = names_and_types;
|
|
||||||
|
|
||||||
std::map<std::string, NamesAndTypesList> nested;
|
using NameToDataType = std::map<String, DataTypePtr>;
|
||||||
|
|
||||||
|
NameToDataType getSubcolumnsOfNested(const NamesAndTypesList & names_and_types)
|
||||||
|
{
|
||||||
|
std::unordered_map<String, NamesAndTypesList> nested;
|
||||||
for (const auto & name_type : names_and_types)
|
for (const auto & name_type : names_and_types)
|
||||||
{
|
{
|
||||||
const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(name_type.type.get());
|
const DataTypeArray * type_arr = typeid_cast<const DataTypeArray *>(name_type.type.get());
|
||||||
@ -149,10 +151,36 @@ NamesAndTypesList collect(const NamesAndTypesList & names_and_types)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_map<String, DataTypePtr> nested_types;
|
std::map<String, DataTypePtr> nested_types;
|
||||||
|
|
||||||
for (const auto & [name, elems] : nested)
|
for (const auto & [name, elems] : nested)
|
||||||
nested_types.emplace(name, createNested(elems.getTypes(), elems.getNames()));
|
nested_types.emplace(name, createNested(elems.getTypes(), elems.getNames()));
|
||||||
|
|
||||||
|
return nested_types;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
NamesAndTypesList collect(const NamesAndTypesList & names_and_types)
|
||||||
|
{
|
||||||
|
NamesAndTypesList res;
|
||||||
|
auto nested_types = getSubcolumnsOfNested(names_and_types);
|
||||||
|
|
||||||
|
for (const auto & name_type : names_and_types)
|
||||||
|
if (!nested_types.count(splitName(name_type.name).first))
|
||||||
|
res.push_back(name_type);
|
||||||
|
|
||||||
|
for (const auto & name_type : nested_types)
|
||||||
|
res.emplace_back(name_type.first, name_type.second);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types)
|
||||||
|
{
|
||||||
|
auto nested_types = getSubcolumnsOfNested(names_and_types);
|
||||||
|
auto res = names_and_types;
|
||||||
|
|
||||||
for (auto & name_type : res)
|
for (auto & name_type : res)
|
||||||
{
|
{
|
||||||
auto split = splitName(name_type.name);
|
auto split = splitName(name_type.name);
|
||||||
|
@ -23,6 +23,9 @@ namespace Nested
|
|||||||
/// Collect Array columns in a form of `column_name.element_name` to single Array(Tuple(...)) column.
|
/// Collect Array columns in a form of `column_name.element_name` to single Array(Tuple(...)) column.
|
||||||
NamesAndTypesList collect(const NamesAndTypesList & names_and_types);
|
NamesAndTypesList collect(const NamesAndTypesList & names_and_types);
|
||||||
|
|
||||||
|
/// Convert old-style nested (single arrays with same prefix, `n.a`, `n.b`...) to subcolumns of data type Nested.
|
||||||
|
NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types);
|
||||||
|
|
||||||
/// Check that sizes of arrays - elements of nested data structures - are equal.
|
/// Check that sizes of arrays - elements of nested data structures - are equal.
|
||||||
void validateArraySizes(const Block & block);
|
void validateArraySizes(const Block & block);
|
||||||
}
|
}
|
||||||
|
@ -51,7 +51,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
|
|||||||
{
|
{
|
||||||
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
|
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
|
||||||
return std::make_unique<MergeTreeReaderInMemory>(
|
return std::make_unique<MergeTreeReaderInMemory>(
|
||||||
ptr, Nested::collect(columns_to_read), metadata_snapshot, mark_ranges, reader_settings);
|
ptr, columns_to_read, metadata_snapshot, mark_ranges, reader_settings);
|
||||||
}
|
}
|
||||||
|
|
||||||
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(
|
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(
|
||||||
|
@ -50,7 +50,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
|
|||||||
{
|
{
|
||||||
auto ptr = std::static_pointer_cast<const MergeTreeDataPartWide>(shared_from_this());
|
auto ptr = std::static_pointer_cast<const MergeTreeDataPartWide>(shared_from_this());
|
||||||
return std::make_unique<MergeTreeReaderWide>(
|
return std::make_unique<MergeTreeReaderWide>(
|
||||||
ptr, Nested::collect(columns_to_read), metadata_snapshot, uncompressed_cache,
|
ptr, Nested::convertToSubcolumns(columns_to_read), metadata_snapshot, uncompressed_cache,
|
||||||
mark_cache, mark_ranges, reader_settings,
|
mark_cache, mark_ranges, reader_settings,
|
||||||
avg_value_size_hints, profile_callback);
|
avg_value_size_hints, profile_callback);
|
||||||
}
|
}
|
||||||
|
@ -95,7 +95,7 @@ public:
|
|||||||
BufferSource(const Names & column_names_, StorageBuffer::Buffer & buffer_, const StorageBuffer & storage, const StorageMetadataPtr & metadata_snapshot)
|
BufferSource(const Names & column_names_, StorageBuffer::Buffer & buffer_, const StorageBuffer & storage, const StorageMetadataPtr & metadata_snapshot)
|
||||||
: SourceWithProgress(
|
: SourceWithProgress(
|
||||||
metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID()))
|
metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID()))
|
||||||
, column_names(column_names_.begin(), column_names_.end())
|
, column_names_and_types(metadata_snapshot->getColumns().getAllWithSubcolumns().addTypes(column_names_))
|
||||||
, buffer(buffer_) {}
|
, buffer(buffer_) {}
|
||||||
|
|
||||||
String getName() const override { return "Buffer"; }
|
String getName() const override { return "Buffer"; }
|
||||||
@ -115,10 +115,16 @@ protected:
|
|||||||
return res;
|
return res;
|
||||||
|
|
||||||
Columns columns;
|
Columns columns;
|
||||||
columns.reserve(column_names.size());
|
columns.reserve(column_names_and_types.size());
|
||||||
|
|
||||||
for (const auto & name : column_names)
|
for (const auto & elem : column_names_and_types)
|
||||||
columns.push_back(buffer.data.getByName(name).column);
|
{
|
||||||
|
const auto & current_column = buffer.data.getByName(elem.getStorageName()).column;
|
||||||
|
if (elem.isSubcolumn())
|
||||||
|
columns.emplace_back(elem.getStorageType()->getSubcolumn(elem.getSubcolumnName(), *current_column->assumeMutable()));
|
||||||
|
else
|
||||||
|
columns.emplace_back(std::move(current_column));
|
||||||
|
}
|
||||||
|
|
||||||
UInt64 size = columns.at(0)->size();
|
UInt64 size = columns.at(0)->size();
|
||||||
res.setColumns(std::move(columns), size);
|
res.setColumns(std::move(columns), size);
|
||||||
@ -127,7 +133,7 @@ protected:
|
|||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Names column_names;
|
NamesAndTypesList column_names_and_types;
|
||||||
StorageBuffer::Buffer & buffer;
|
StorageBuffer::Buffer & buffer;
|
||||||
bool has_been_read = false;
|
bool has_been_read = false;
|
||||||
};
|
};
|
||||||
@ -188,8 +194,8 @@ void StorageBuffer::read(
|
|||||||
{
|
{
|
||||||
const auto & dest_columns = destination_metadata_snapshot->getColumns();
|
const auto & dest_columns = destination_metadata_snapshot->getColumns();
|
||||||
const auto & our_columns = metadata_snapshot->getColumns();
|
const auto & our_columns = metadata_snapshot->getColumns();
|
||||||
return dest_columns.hasPhysical(column_name) &&
|
return dest_columns.hasPhysicalOrSubcolumn(column_name) &&
|
||||||
dest_columns.get(column_name).type->equals(*our_columns.get(column_name).type);
|
dest_columns.getPhysicalOrSubcolumn(column_name).type->equals(*our_columns.getPhysicalOrSubcolumn(column_name).type);
|
||||||
});
|
});
|
||||||
|
|
||||||
if (dst_has_same_structure)
|
if (dst_has_same_structure)
|
||||||
|
@ -57,7 +57,6 @@ public:
|
|||||||
for (const auto & name_type : columns)
|
for (const auto & name_type : columns)
|
||||||
res.insert({ name_type.type->createColumn(), name_type.type, name_type.name });
|
res.insert({ name_type.type->createColumn(), name_type.type, name_type.name });
|
||||||
|
|
||||||
// return Nested::flatten(res);
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -628,7 +627,7 @@ Pipe StorageLog::read(
|
|||||||
loadMarks();
|
loadMarks();
|
||||||
|
|
||||||
auto all_columns = metadata_snapshot->getColumns().getAllWithSubcolumns().addTypes(column_names);
|
auto all_columns = metadata_snapshot->getColumns().getAllWithSubcolumns().addTypes(column_names);
|
||||||
all_columns = Nested::collect(all_columns);
|
all_columns = Nested::convertToSubcolumns(all_columns);
|
||||||
|
|
||||||
std::shared_lock<std::shared_mutex> lock(rwlock);
|
std::shared_lock<std::shared_mutex> lock(rwlock);
|
||||||
|
|
||||||
|
@ -219,7 +219,6 @@ Chunk TinyLogSource::generate()
|
|||||||
streams.clear();
|
streams.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
// auto flatten = Nested::flatten(res);
|
|
||||||
return Chunk(res.getColumns(), res.rows());
|
return Chunk(res.getColumns(), res.rows());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -449,7 +448,7 @@ Pipe StorageTinyLog::read(
|
|||||||
// When reading, we lock the entire storage, because we only have one file
|
// When reading, we lock the entire storage, because we only have one file
|
||||||
// per column and can't modify it concurrently.
|
// per column and can't modify it concurrently.
|
||||||
return Pipe(std::make_shared<TinyLogSource>(
|
return Pipe(std::make_shared<TinyLogSource>(
|
||||||
max_block_size, Nested::collect(all_columns),
|
max_block_size, Nested::convertToSubcolumns(all_columns),
|
||||||
*this, context.getSettingsRef().max_read_buffer_size));
|
*this, context.getSettingsRef().max_read_buffer_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user