mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-20 05:05:38 +00:00
Bring back optimization for reading subcolumns of single column in Compact parts
This commit is contained in:
parent
a2d37aba4d
commit
28534272c9
@ -9,6 +9,8 @@
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
|
||||
#include <Common/logger_useful.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -148,7 +148,8 @@ void MergeTreeReaderCompact::readData(
|
||||
ColumnPtr & column,
|
||||
size_t rows_to_read,
|
||||
const InputStreamGetter & getter,
|
||||
ISerialization::SubstreamsCache & cache)
|
||||
ISerialization::SubstreamsCache & cache,
|
||||
std::unordered_map<String, ColumnPtr> & columns_cache_for_subcolumns)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -171,6 +172,17 @@ void MergeTreeReaderCompact::readData(
|
||||
const auto & type_in_storage = name_and_type.getTypeInStorage();
|
||||
const auto & name_in_storage = name_and_type.getNameInStorage();
|
||||
|
||||
if (auto cache_for_subcolumns_it = columns_cache_for_subcolumns.find(name_in_storage); cache_for_subcolumns_it != columns_cache_for_subcolumns.end())
|
||||
{
|
||||
auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), cache_for_subcolumns_it->second);
|
||||
/// TODO: Avoid extra copying.
|
||||
if (column->empty())
|
||||
column = IColumn::mutate(subcolumn);
|
||||
else
|
||||
column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
|
||||
}
|
||||
else
|
||||
{
|
||||
auto serialization = getSerializationInPart({name_in_storage, type_in_storage});
|
||||
ColumnPtr temp_column = type_in_storage->createColumn(*serialization);
|
||||
|
||||
@ -182,6 +194,9 @@ void MergeTreeReaderCompact::readData(
|
||||
column = subcolumn;
|
||||
else
|
||||
column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
|
||||
|
||||
columns_cache_for_subcolumns[name_in_storage] = temp_column;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -45,7 +45,8 @@ protected:
|
||||
ColumnPtr & column,
|
||||
size_t rows_to_read,
|
||||
const InputStreamGetter & getter,
|
||||
ISerialization::SubstreamsCache & cache);
|
||||
ISerialization::SubstreamsCache & cache,
|
||||
std::unordered_map<String, ColumnPtr> & columns_cache_for_subcolumns);
|
||||
|
||||
void readPrefix(
|
||||
const NameAndTypePair & name_and_type,
|
||||
|
@ -29,6 +29,12 @@ try
|
||||
/// Use cache to avoid reading the column with the same name twice.
|
||||
/// It may happen if there are empty array Nested in the part.
|
||||
ISerialization::SubstreamsCache cache;
|
||||
/// If we need to read multiple subcolumns from a single column in storage,
|
||||
/// we will read it this column only once and then reuse to extract all subcolumns.
|
||||
/// We cannot use SubstreamsCache for it, because we may also read the full column itself
|
||||
/// and it might me not empty inside res_columns (and SubstreamsCache contains the whole columns).
|
||||
/// TODO: refactor the code in a way when we first read all full columns and then extract all subcolumns from them.
|
||||
std::unordered_map<String, ColumnPtr> columns_cache_for_subcolumns;
|
||||
|
||||
for (size_t pos = 0; pos < num_columns; ++pos)
|
||||
{
|
||||
@ -56,7 +62,7 @@ try
|
||||
};
|
||||
|
||||
readPrefix(columns_to_read[pos], buffer_getter, buffer_getter_for_prefix, columns_for_offsets[pos]);
|
||||
readData(columns_to_read[pos], column, rows_to_read, buffer_getter, cache);
|
||||
readData(columns_to_read[pos], column, rows_to_read, buffer_getter, cache, columns_cache_for_subcolumns);
|
||||
}
|
||||
|
||||
++from_mark;
|
||||
|
Loading…
Reference in New Issue
Block a user