2020-04-14 19:47:19 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeReaderInMemory.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
2022-02-09 00:18:53 +00:00
|
|
|
#include <Interpreters/getColumnFromBlock.h>
|
2020-04-14 19:47:19 +00:00
|
|
|
#include <DataTypes/DataTypeArray.h>
|
|
|
|
#include <DataTypes/NestedUtils.h>
|
2020-06-01 17:52:09 +00:00
|
|
|
#include <Columns/ColumnArray.h>
|
2020-04-14 19:47:19 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int CANNOT_READ_ALL_DATA;
|
|
|
|
extern const int ARGUMENT_OUT_OF_BOUND;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
MergeTreeReaderInMemory::MergeTreeReaderInMemory(
|
2022-09-05 16:55:00 +00:00
|
|
|
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
|
2020-04-14 19:47:19 +00:00
|
|
|
DataPartInMemoryPtr data_part_,
|
|
|
|
NamesAndTypesList columns_,
|
2020-06-26 11:30:23 +00:00
|
|
|
const StorageMetadataPtr & metadata_snapshot_,
|
2020-04-14 19:47:19 +00:00
|
|
|
MarkRanges mark_ranges_,
|
|
|
|
MergeTreeReaderSettings settings_)
|
2022-03-02 17:22:12 +00:00
|
|
|
: IMergeTreeReader(
|
2022-09-05 16:55:00 +00:00
|
|
|
data_part_info_for_read_,
|
2022-03-02 17:22:12 +00:00
|
|
|
columns_,
|
|
|
|
metadata_snapshot_,
|
|
|
|
nullptr,
|
|
|
|
nullptr,
|
|
|
|
mark_ranges_,
|
|
|
|
settings_,
|
|
|
|
{})
|
2020-04-14 19:47:19 +00:00
|
|
|
, part_in_memory(std::move(data_part_))
|
|
|
|
{
|
2022-09-02 15:05:58 +00:00
|
|
|
for (const auto & column_to_read : columns_to_read)
|
2020-06-01 17:52:09 +00:00
|
|
|
{
|
2020-06-03 09:51:23 +00:00
|
|
|
/// If array of Nested column is missing in part,
|
|
|
|
/// we have to read its offsets if they exist.
|
2022-09-06 17:38:51 +00:00
|
|
|
if (typeid_cast<const DataTypeArray *>(column_to_read.type.get())
|
|
|
|
&& !tryGetColumnFromBlock(part_in_memory->block, column_to_read))
|
2022-09-06 15:01:47 +00:00
|
|
|
{
|
|
|
|
if (auto offsets_position = findColumnForOffsets(column_to_read))
|
|
|
|
{
|
|
|
|
positions_for_offsets[column_to_read.name] = *offsets_position;
|
|
|
|
partially_read_columns.insert(column_to_read.name);
|
|
|
|
}
|
|
|
|
}
|
2020-06-01 17:52:09 +00:00
|
|
|
}
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
2021-10-15 08:36:26 +00:00
|
|
|
size_t MergeTreeReaderInMemory::readRows(
|
|
|
|
size_t from_mark, size_t /* current_task_last_mark */, bool continue_reading, size_t max_rows_to_read, Columns & res_columns)
|
2020-04-14 19:47:19 +00:00
|
|
|
{
|
2020-06-01 17:52:09 +00:00
|
|
|
if (!continue_reading)
|
|
|
|
total_rows_read = 0;
|
|
|
|
|
2022-09-05 16:55:00 +00:00
|
|
|
size_t total_marks = data_part_info_for_read->getIndexGranularity().getMarksCount();
|
2020-04-14 19:47:19 +00:00
|
|
|
if (from_mark >= total_marks)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Mark {} is out of bound. Max mark: {}",
|
|
|
|
toString(from_mark), toString(total_marks));
|
2020-04-14 19:47:19 +00:00
|
|
|
|
|
|
|
size_t num_columns = res_columns.size();
|
|
|
|
checkNumberOfColumns(num_columns);
|
|
|
|
|
|
|
|
size_t part_rows = part_in_memory->block.rows();
|
|
|
|
if (total_rows_read >= part_rows)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data in MergeTreeReaderInMemory. "
|
|
|
|
"Rows already read: {}. Rows in part: {}", total_rows_read, part_rows);
|
2020-04-14 19:47:19 +00:00
|
|
|
|
2020-06-01 17:52:09 +00:00
|
|
|
size_t rows_to_read = std::min(max_rows_to_read, part_rows - total_rows_read);
|
2022-07-27 14:05:16 +00:00
|
|
|
for (size_t i = 0; i < num_columns; ++i)
|
2020-04-14 19:47:19 +00:00
|
|
|
{
|
2022-07-27 14:05:16 +00:00
|
|
|
const auto & column_to_read = columns_to_read[i];
|
2020-04-14 19:47:19 +00:00
|
|
|
|
2020-06-03 09:51:23 +00:00
|
|
|
/// Copy offsets, if array of Nested column is missing in part.
|
2022-07-27 14:05:16 +00:00
|
|
|
auto offsets_it = positions_for_offsets.find(column_to_read.name);
|
|
|
|
if (offsets_it != positions_for_offsets.end() && !column_to_read.isSubcolumn())
|
2020-04-14 19:47:19 +00:00
|
|
|
{
|
2020-06-01 17:52:09 +00:00
|
|
|
const auto & source_offsets = assert_cast<const ColumnArray &>(
|
|
|
|
*part_in_memory->block.getByPosition(offsets_it->second).column).getOffsets();
|
|
|
|
|
2020-04-14 19:47:19 +00:00
|
|
|
if (res_columns[i] == nullptr)
|
2022-07-27 14:05:16 +00:00
|
|
|
res_columns[i] = column_to_read.type->createColumn();
|
2020-04-14 19:47:19 +00:00
|
|
|
|
|
|
|
auto mutable_column = res_columns[i]->assumeMutable();
|
2020-06-01 17:52:09 +00:00
|
|
|
auto & res_offstes = assert_cast<ColumnArray &>(*mutable_column).getOffsets();
|
2020-07-13 09:10:08 +00:00
|
|
|
size_t start_offset = total_rows_read ? source_offsets[total_rows_read - 1] : 0;
|
2020-06-01 17:52:09 +00:00
|
|
|
for (size_t row = 0; row < rows_to_read; ++row)
|
2020-07-13 09:10:08 +00:00
|
|
|
res_offstes.push_back(source_offsets[total_rows_read + row] - start_offset);
|
2020-06-01 17:52:09 +00:00
|
|
|
|
2020-04-14 19:47:19 +00:00
|
|
|
res_columns[i] = std::move(mutable_column);
|
|
|
|
}
|
2022-07-27 14:05:16 +00:00
|
|
|
else if (part_in_memory->hasColumnFiles(column_to_read))
|
2020-06-01 17:52:09 +00:00
|
|
|
{
|
2022-07-27 14:05:16 +00:00
|
|
|
auto block_column = getColumnFromBlock(part_in_memory->block, column_to_read);
|
2020-06-01 17:52:09 +00:00
|
|
|
if (rows_to_read == part_rows)
|
|
|
|
{
|
|
|
|
res_columns[i] = block_column;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (res_columns[i] == nullptr)
|
2022-07-27 14:05:16 +00:00
|
|
|
res_columns[i] = column_to_read.type->createColumn();
|
2020-06-01 17:52:09 +00:00
|
|
|
|
|
|
|
auto mutable_column = res_columns[i]->assumeMutable();
|
|
|
|
mutable_column->insertRangeFrom(*block_column, total_rows_read, rows_to_read);
|
|
|
|
res_columns[i] = std::move(mutable_column);
|
|
|
|
}
|
|
|
|
}
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
2020-06-01 17:52:09 +00:00
|
|
|
total_rows_read += rows_to_read;
|
|
|
|
return rows_to_read;
|
2020-04-14 19:47:19 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|