mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-20 13:15:42 +00:00
Bring back optimization for reading subcolumns of single column in Compact parts
This commit is contained in:
parent
a2d37aba4d
commit
28534272c9
@ -9,6 +9,8 @@
|
|||||||
#include <DataTypes/DataTypeString.h>
|
#include <DataTypes/DataTypeString.h>
|
||||||
#include <IO/ReadBufferFromString.h>
|
#include <IO/ReadBufferFromString.h>
|
||||||
|
|
||||||
|
#include <Common/logger_useful.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -148,7 +148,8 @@ void MergeTreeReaderCompact::readData(
|
|||||||
ColumnPtr & column,
|
ColumnPtr & column,
|
||||||
size_t rows_to_read,
|
size_t rows_to_read,
|
||||||
const InputStreamGetter & getter,
|
const InputStreamGetter & getter,
|
||||||
ISerialization::SubstreamsCache & cache)
|
ISerialization::SubstreamsCache & cache,
|
||||||
|
std::unordered_map<String, ColumnPtr> & columns_cache_for_subcolumns)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -171,6 +172,17 @@ void MergeTreeReaderCompact::readData(
|
|||||||
const auto & type_in_storage = name_and_type.getTypeInStorage();
|
const auto & type_in_storage = name_and_type.getTypeInStorage();
|
||||||
const auto & name_in_storage = name_and_type.getNameInStorage();
|
const auto & name_in_storage = name_and_type.getNameInStorage();
|
||||||
|
|
||||||
|
if (auto cache_for_subcolumns_it = columns_cache_for_subcolumns.find(name_in_storage); cache_for_subcolumns_it != columns_cache_for_subcolumns.end())
|
||||||
|
{
|
||||||
|
auto subcolumn = type_in_storage->getSubcolumn(name_and_type.getSubcolumnName(), cache_for_subcolumns_it->second);
|
||||||
|
/// TODO: Avoid extra copying.
|
||||||
|
if (column->empty())
|
||||||
|
column = IColumn::mutate(subcolumn);
|
||||||
|
else
|
||||||
|
column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
auto serialization = getSerializationInPart({name_in_storage, type_in_storage});
|
auto serialization = getSerializationInPart({name_in_storage, type_in_storage});
|
||||||
ColumnPtr temp_column = type_in_storage->createColumn(*serialization);
|
ColumnPtr temp_column = type_in_storage->createColumn(*serialization);
|
||||||
|
|
||||||
@ -182,6 +194,9 @@ void MergeTreeReaderCompact::readData(
|
|||||||
column = subcolumn;
|
column = subcolumn;
|
||||||
else
|
else
|
||||||
column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
|
column->assumeMutable()->insertRangeFrom(*subcolumn, 0, subcolumn->size());
|
||||||
|
|
||||||
|
columns_cache_for_subcolumns[name_in_storage] = temp_column;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -45,7 +45,8 @@ protected:
|
|||||||
ColumnPtr & column,
|
ColumnPtr & column,
|
||||||
size_t rows_to_read,
|
size_t rows_to_read,
|
||||||
const InputStreamGetter & getter,
|
const InputStreamGetter & getter,
|
||||||
ISerialization::SubstreamsCache & cache);
|
ISerialization::SubstreamsCache & cache,
|
||||||
|
std::unordered_map<String, ColumnPtr> & columns_cache_for_subcolumns);
|
||||||
|
|
||||||
void readPrefix(
|
void readPrefix(
|
||||||
const NameAndTypePair & name_and_type,
|
const NameAndTypePair & name_and_type,
|
||||||
|
@ -29,6 +29,12 @@ try
|
|||||||
/// Use cache to avoid reading the column with the same name twice.
|
/// Use cache to avoid reading the column with the same name twice.
|
||||||
/// It may happen if there are empty array Nested in the part.
|
/// It may happen if there are empty array Nested in the part.
|
||||||
ISerialization::SubstreamsCache cache;
|
ISerialization::SubstreamsCache cache;
|
||||||
|
/// If we need to read multiple subcolumns from a single column in storage,
|
||||||
|
/// we will read it this column only once and then reuse to extract all subcolumns.
|
||||||
|
/// We cannot use SubstreamsCache for it, because we may also read the full column itself
|
||||||
|
/// and it might me not empty inside res_columns (and SubstreamsCache contains the whole columns).
|
||||||
|
/// TODO: refactor the code in a way when we first read all full columns and then extract all subcolumns from them.
|
||||||
|
std::unordered_map<String, ColumnPtr> columns_cache_for_subcolumns;
|
||||||
|
|
||||||
for (size_t pos = 0; pos < num_columns; ++pos)
|
for (size_t pos = 0; pos < num_columns; ++pos)
|
||||||
{
|
{
|
||||||
@ -56,7 +62,7 @@ try
|
|||||||
};
|
};
|
||||||
|
|
||||||
readPrefix(columns_to_read[pos], buffer_getter, buffer_getter_for_prefix, columns_for_offsets[pos]);
|
readPrefix(columns_to_read[pos], buffer_getter, buffer_getter_for_prefix, columns_for_offsets[pos]);
|
||||||
readData(columns_to_read[pos], column, rows_to_read, buffer_getter, cache);
|
readData(columns_to_read[pos], column, rows_to_read, buffer_getter, cache, columns_cache_for_subcolumns);
|
||||||
}
|
}
|
||||||
|
|
||||||
++from_mark;
|
++from_mark;
|
||||||
|
Loading…
Reference in New Issue
Block a user