ClickHouse/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp

120 lines
4.5 KiB
C++
Raw Normal View History

2020-04-14 19:47:19 +00:00
#include <Storages/MergeTree/MergeTreeReaderInMemory.h>
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/NestedUtils.h>
2020-06-01 17:52:09 +00:00
#include <Columns/ColumnArray.h>
2020-04-14 19:47:19 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_READ_ALL_DATA;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int LOGICAL_ERROR;
2020-04-14 19:47:19 +00:00
}
MergeTreeReaderInMemory::MergeTreeReaderInMemory(
DataPartInMemoryPtr data_part_,
NamesAndTypesList columns_,
2020-06-26 11:30:23 +00:00
const StorageMetadataPtr & metadata_snapshot_,
2020-04-14 19:47:19 +00:00
MarkRanges mark_ranges_,
MergeTreeReaderSettings settings_)
2020-06-26 11:30:23 +00:00
: IMergeTreeReader(data_part_, std::move(columns_), metadata_snapshot_,
2020-04-14 19:47:19 +00:00
nullptr, nullptr, std::move(mark_ranges_),
std::move(settings_), {})
, part_in_memory(std::move(data_part_))
{
2020-06-01 17:52:09 +00:00
for (const auto & name_and_type : columns)
{
auto [name, type] = getColumnFromPart(name_and_type);
2020-06-03 09:51:23 +00:00
/// If array of Nested column is missing in part,
/// we have to read its offsets if they exist.
2020-06-01 17:52:09 +00:00
if (!part_in_memory->block.has(name) && typeid_cast<const DataTypeArray *>(type.get()))
if (auto offset_position = findColumnForOffsets(name))
positions_for_offsets[name] = *offset_position;
}
2020-04-14 19:47:19 +00:00
}
static ColumnPtr getColumnFromBlock(const Block & block, const NameAndTypePair & name_and_type)
{
2020-12-22 15:03:48 +00:00
auto storage_name = name_and_type.getNameInStorage();
if (!block.has(storage_name))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Not found column '{}' in block", storage_name);
const auto & column = block.getByName(storage_name).column;
if (name_and_type.isSubcolumn())
2020-12-22 15:03:48 +00:00
return name_and_type.getTypeInStorage()->getSubcolumn(name_and_type.getSubcolumnName(), *column);
return column;
}
2020-06-01 17:52:09 +00:00
size_t MergeTreeReaderInMemory::readRows(size_t from_mark, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
2020-04-14 19:47:19 +00:00
{
2020-06-01 17:52:09 +00:00
if (!continue_reading)
total_rows_read = 0;
2020-04-14 19:47:19 +00:00
size_t total_marks = data_part->index_granularity.getMarksCount();
if (from_mark >= total_marks)
throw Exception("Mark " + toString(from_mark) + " is out of bound. Max mark: "
+ toString(total_marks), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
size_t num_columns = res_columns.size();
checkNumberOfColumns(num_columns);
size_t part_rows = part_in_memory->block.rows();
if (total_rows_read >= part_rows)
throw Exception("Cannot read data in MergeTreeReaderInMemory. Rows already read: "
+ toString(total_rows_read) + ". Rows in part: " + toString(part_rows), ErrorCodes::CANNOT_READ_ALL_DATA);
2020-06-01 17:52:09 +00:00
size_t rows_to_read = std::min(max_rows_to_read, part_rows - total_rows_read);
2020-04-14 19:47:19 +00:00
auto column_it = columns.begin();
for (size_t i = 0; i < num_columns; ++i, ++column_it)
{
auto name_type = getColumnFromPart(*column_it);
2020-04-14 19:47:19 +00:00
2020-06-03 09:51:23 +00:00
/// Copy offsets, if array of Nested column is missing in part.
auto offsets_it = positions_for_offsets.find(name_type.name);
if (offsets_it != positions_for_offsets.end() && !name_type.isSubcolumn())
2020-04-14 19:47:19 +00:00
{
2020-06-01 17:52:09 +00:00
const auto & source_offsets = assert_cast<const ColumnArray &>(
*part_in_memory->block.getByPosition(offsets_it->second).column).getOffsets();
2020-04-14 19:47:19 +00:00
if (res_columns[i] == nullptr)
res_columns[i] = name_type.type->createColumn();
2020-04-14 19:47:19 +00:00
auto mutable_column = res_columns[i]->assumeMutable();
2020-06-01 17:52:09 +00:00
auto & res_offstes = assert_cast<ColumnArray &>(*mutable_column).getOffsets();
size_t start_offset = total_rows_read ? source_offsets[total_rows_read - 1] : 0;
2020-06-01 17:52:09 +00:00
for (size_t row = 0; row < rows_to_read; ++row)
res_offstes.push_back(source_offsets[total_rows_read + row] - start_offset);
2020-06-01 17:52:09 +00:00
2020-04-14 19:47:19 +00:00
res_columns[i] = std::move(mutable_column);
}
else if (part_in_memory->hasColumnFiles(name_type))
2020-06-01 17:52:09 +00:00
{
auto block_column = getColumnFromBlock(part_in_memory->block, name_type);
2020-06-01 17:52:09 +00:00
if (rows_to_read == part_rows)
{
res_columns[i] = block_column;
}
else
{
if (res_columns[i] == nullptr)
res_columns[i] = name_type.type->createColumn();
2020-06-01 17:52:09 +00:00
auto mutable_column = res_columns[i]->assumeMutable();
mutable_column->insertRangeFrom(*block_column, total_rows_read, rows_to_read);
res_columns[i] = std::move(mutable_column);
}
}
2020-04-14 19:47:19 +00:00
}
2020-06-01 17:52:09 +00:00
total_rows_read += rows_to_read;
return rows_to_read;
2020-04-14 19:47:19 +00:00
}
}