2019-10-01 16:50:08 +00:00
|
|
|
#include <Storages/MergeTree/MergeTreeReverseSelectProcessor.h>
|
|
|
|
#include <Storages/MergeTree/MergeTreeBaseSelectProcessor.h>
|
2019-10-10 16:30:30 +00:00
|
|
|
#include <Storages/MergeTree/IMergeTreeReader.h>
|
2019-07-18 14:41:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int MEMORY_LIMIT_EXCEEDED;
|
|
|
|
}
|
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
static Block replaceTypes(Block && header, const MergeTreeData::DataPartPtr & data_part)
|
|
|
|
{
|
|
|
|
/// Types may be different during ALTER (when this stream is used to perform an ALTER).
|
|
|
|
/// NOTE: We may use similar code to implement non blocking ALTERs.
|
2020-01-16 16:15:01 +00:00
|
|
|
for (const auto & name_type : data_part->getColumns())
|
2019-10-01 16:50:08 +00:00
|
|
|
{
|
|
|
|
if (header.has(name_type.name))
|
|
|
|
{
|
|
|
|
auto & elem = header.getByName(name_type.name);
|
|
|
|
if (!elem.type->equals(*name_type.type))
|
|
|
|
{
|
|
|
|
elem.type = name_type.type;
|
|
|
|
elem.column = elem.type->createColumn();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-07-18 14:41:11 +00:00
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
return std::move(header);
|
|
|
|
}
|
2019-07-18 14:41:11 +00:00
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor(
|
2019-07-18 14:41:11 +00:00
|
|
|
const MergeTreeData & storage_,
|
|
|
|
const MergeTreeData::DataPartPtr & owned_data_part_,
|
|
|
|
UInt64 max_block_size_rows_,
|
|
|
|
size_t preferred_block_size_bytes_,
|
|
|
|
size_t preferred_max_column_in_block_size_bytes_,
|
2019-07-19 14:56:00 +00:00
|
|
|
Names required_columns_,
|
2019-10-04 15:40:05 +00:00
|
|
|
MarkRanges mark_ranges_,
|
2019-07-18 14:41:11 +00:00
|
|
|
bool use_uncompressed_cache_,
|
|
|
|
const PrewhereInfoPtr & prewhere_info_,
|
|
|
|
bool check_columns,
|
2019-12-18 15:54:45 +00:00
|
|
|
const MergeTreeReaderSettings & reader_settings_,
|
2019-07-18 14:41:11 +00:00
|
|
|
const Names & virt_column_names_,
|
|
|
|
size_t part_index_in_query_,
|
|
|
|
bool quiet)
|
|
|
|
:
|
2019-10-01 16:50:08 +00:00
|
|
|
MergeTreeBaseSelectProcessor{
|
2019-10-02 11:57:17 +00:00
|
|
|
replaceTypes(storage_.getSampleBlockForColumns(required_columns_), owned_data_part_),
|
2019-10-01 16:50:08 +00:00
|
|
|
storage_, prewhere_info_, max_block_size_rows_,
|
2019-12-19 13:10:57 +00:00
|
|
|
preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_,
|
|
|
|
reader_settings_, use_uncompressed_cache_, virt_column_names_},
|
2019-10-04 15:40:05 +00:00
|
|
|
required_columns{std::move(required_columns_)},
|
2019-07-18 14:41:11 +00:00
|
|
|
data_part{owned_data_part_},
|
2019-10-04 15:40:05 +00:00
|
|
|
all_mark_ranges(std::move(mark_ranges_)),
|
2019-07-18 14:41:11 +00:00
|
|
|
part_index_in_query(part_index_in_query_),
|
2020-02-27 16:47:40 +00:00
|
|
|
path(data_part->getFullRelativePath())
|
2019-07-18 14:41:11 +00:00
|
|
|
{
|
|
|
|
/// Let's estimate total number of rows for progress bar.
|
|
|
|
for (const auto & range : all_mark_ranges)
|
|
|
|
total_marks_count += range.end - range.begin;
|
|
|
|
|
|
|
|
size_t total_rows = data_part->index_granularity.getTotalRows();
|
|
|
|
|
|
|
|
if (!quiet)
|
|
|
|
LOG_TRACE(log, "Reading " << all_mark_ranges.size() << " ranges in reverse order from part " << data_part->name
|
|
|
|
<< ", approx. " << total_rows
|
|
|
|
<< (all_mark_ranges.size() > 1
|
|
|
|
? ", up to " + toString(data_part->index_granularity.getRowsCountInRanges(all_mark_ranges))
|
|
|
|
: "")
|
|
|
|
<< " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
|
|
|
|
|
|
|
|
addTotalRowsApprox(total_rows);
|
|
|
|
|
2019-10-02 11:57:17 +00:00
|
|
|
ordered_names = header_without_virtual_columns.getNames();
|
2019-07-18 14:41:11 +00:00
|
|
|
|
2019-07-19 14:56:00 +00:00
|
|
|
task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns);
|
2019-07-18 14:41:11 +00:00
|
|
|
|
|
|
|
/// will be used to distinguish between PREWHERE and WHERE columns when applying filter
|
2019-07-19 14:56:00 +00:00
|
|
|
const auto & column_names = task_columns.columns.getNames();
|
|
|
|
column_name_set = NameSet{column_names.begin(), column_names.end()};
|
2019-07-18 14:41:11 +00:00
|
|
|
|
|
|
|
if (use_uncompressed_cache)
|
|
|
|
owned_uncompressed_cache = storage.global_context.getUncompressedCache();
|
|
|
|
|
|
|
|
owned_mark_cache = storage.global_context.getMarkCache();
|
|
|
|
|
2019-10-10 16:30:30 +00:00
|
|
|
reader = data_part->getReader(task_columns.columns, all_mark_ranges,
|
|
|
|
owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings);
|
2019-07-18 14:41:11 +00:00
|
|
|
|
|
|
|
if (prewhere_info)
|
2019-10-10 16:30:30 +00:00
|
|
|
pre_reader = data_part->getReader(task_columns.pre_columns, all_mark_ranges,
|
|
|
|
owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings);
|
2019-07-18 14:41:11 +00:00
|
|
|
}
|
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
bool MergeTreeReverseSelectProcessor::getNewTask()
|
2019-07-18 14:41:11 +00:00
|
|
|
try
|
|
|
|
{
|
2019-10-01 16:50:08 +00:00
|
|
|
if ((chunks.empty() && all_mark_ranges.empty()) || total_marks_count == 0)
|
2019-07-18 14:41:11 +00:00
|
|
|
{
|
|
|
|
finish();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2020-01-04 05:46:50 +00:00
|
|
|
/// We have some blocks to return in buffer.
|
2019-07-18 14:41:11 +00:00
|
|
|
/// Return true to continue reading, but actually don't create a task.
|
|
|
|
if (all_mark_ranges.empty())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/// Read ranges from right to left.
|
|
|
|
MarkRanges mark_ranges_for_task = { all_mark_ranges.back() };
|
|
|
|
all_mark_ranges.pop_back();
|
|
|
|
|
|
|
|
auto size_predictor = (preferred_block_size_bytes == 0)
|
|
|
|
? nullptr
|
|
|
|
: std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, data_part->storage.getSampleBlock());
|
|
|
|
|
|
|
|
task = std::make_unique<MergeTreeReadTask>(
|
2019-07-19 14:56:00 +00:00
|
|
|
data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set,
|
|
|
|
task_columns.columns, task_columns.pre_columns, prewhere_info && prewhere_info->remove_prewhere_column,
|
|
|
|
task_columns.should_reorder, std::move(size_predictor));
|
2019-07-18 14:41:11 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
/// Suspicion of the broken part. A part is added to the queue for verification.
|
|
|
|
if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
|
|
|
|
storage.reportBrokenPart(data_part->name);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
Chunk MergeTreeReverseSelectProcessor::readFromPart()
|
2019-07-18 14:41:11 +00:00
|
|
|
{
|
2019-10-01 16:50:08 +00:00
|
|
|
Chunk res;
|
2019-07-18 14:41:11 +00:00
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
if (!chunks.empty())
|
2019-07-18 14:41:11 +00:00
|
|
|
{
|
2019-10-01 16:50:08 +00:00
|
|
|
res = std::move(chunks.back());
|
|
|
|
chunks.pop_back();
|
2019-07-18 14:41:11 +00:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!task->range_reader.isInitialized())
|
|
|
|
initializeRangeReaders(*task);
|
|
|
|
|
|
|
|
while (!task->isFinished())
|
|
|
|
{
|
2019-10-01 16:50:08 +00:00
|
|
|
Chunk chunk = readFromPartImpl();
|
|
|
|
chunks.push_back(std::move(chunk));
|
2019-07-18 14:41:11 +00:00
|
|
|
}
|
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
if (chunks.empty())
|
2019-07-18 14:41:11 +00:00
|
|
|
return {};
|
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
res = std::move(chunks.back());
|
|
|
|
chunks.pop_back();
|
2019-07-18 14:41:11 +00:00
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
void MergeTreeReverseSelectProcessor::finish()
|
2019-07-18 14:41:11 +00:00
|
|
|
{
|
|
|
|
/** Close the files (before destroying the object).
|
|
|
|
* When many sources are created, but simultaneously reading only a few of them,
|
|
|
|
* buffers don't waste memory.
|
|
|
|
*/
|
|
|
|
reader.reset();
|
|
|
|
pre_reader.reset();
|
|
|
|
data_part.reset();
|
|
|
|
}
|
|
|
|
|
2019-10-01 16:50:08 +00:00
|
|
|
MergeTreeReverseSelectProcessor::~MergeTreeReverseSelectProcessor() = default;
|
2019-07-18 14:41:11 +00:00
|
|
|
|
|
|
|
}
|