ClickHouse/dbms/src/Storages/MergeTree/MergeTreeSelectBlockInputStream.cpp

163 lines
5.4 KiB
C++
Raw Normal View History

2018-11-29 09:19:42 +00:00
#include <Storages/MergeTree/MergeTreeSelectBlockInputStream.h>
2019-10-10 16:30:30 +00:00
#include <Storages/MergeTree/IMergeTreeReader.h>
2018-11-29 09:19:42 +00:00
#include <Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.h>
#include <Core/Defines.h>
2016-11-20 12:43:20 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int MEMORY_LIMIT_EXCEEDED;
2016-11-20 12:43:20 +00:00
}
2018-11-29 09:19:42 +00:00
MergeTreeSelectBlockInputStream::MergeTreeSelectBlockInputStream(
2018-10-17 03:13:00 +00:00
const MergeTreeData & storage_,
const MergeTreeData::DataPartPtr & owned_data_part_,
2019-02-10 16:55:12 +00:00
UInt64 max_block_size_rows_,
size_t preferred_block_size_bytes_,
size_t preferred_max_column_in_block_size_bytes_,
2019-07-19 14:56:00 +00:00
Names required_columns_,
const MarkRanges & mark_ranges_,
bool use_uncompressed_cache_,
const PrewhereInfoPtr & prewhere_info_,
2019-08-03 11:02:40 +00:00
bool check_columns_,
const MergeTreeReaderSettings & reader_settings_,
const Names & virt_column_names_,
2017-04-05 20:34:19 +00:00
size_t part_index_in_query_,
bool quiet)
:
MergeTreeBaseSelectBlockInputStream{storage_, prewhere_info_, max_block_size_rows_,
preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_,
2019-10-10 16:30:30 +00:00
reader_settings_, use_uncompressed_cache_, virt_column_names_},
2019-07-19 14:56:00 +00:00
required_columns{required_columns_},
2017-04-05 20:34:19 +00:00
data_part{owned_data_part_},
part_columns_lock(data_part->columns_lock),
all_mark_ranges(mark_ranges_),
2017-04-05 20:34:19 +00:00
part_index_in_query(part_index_in_query_),
2019-08-03 11:02:40 +00:00
check_columns(check_columns_),
2017-04-05 20:34:19 +00:00
path(data_part->getFullPath())
{
/// Let's estimate total number of rows for progress bar.
for (const auto & range : all_mark_ranges)
2018-05-23 19:34:37 +00:00
total_marks_count += range.end - range.begin;
2019-07-15 20:26:55 +00:00
size_t total_rows = data_part->index_granularity.getRowsCountInRanges(all_mark_ranges);
if (!quiet)
2017-04-05 20:34:19 +00:00
LOG_TRACE(log, "Reading " << all_mark_ranges.size() << " ranges from part " << data_part->name
<< ", approx. " << total_rows
<< (all_mark_ranges.size() > 1
2019-07-15 20:26:55 +00:00
? ", up to " + toString(total_rows)
: "")
2019-03-25 13:55:24 +00:00
<< " rows starting from " << data_part->index_granularity.getMarkStartingRow(all_mark_ranges.front().begin));
2018-02-23 10:02:29 +00:00
addTotalRowsApprox(total_rows);
2018-02-21 05:11:53 +00:00
2018-04-19 15:18:26 +00:00
header = storage.getSampleBlockForColumns(required_columns);
2018-02-21 05:11:53 +00:00
2018-04-19 15:18:26 +00:00
/// Types may be different during ALTER (when this stream is used to perform an ALTER).
/// NOTE: We may use similar code to implement non blocking ALTERs.
for (const auto & name_type : data_part->columns)
2018-02-21 05:11:53 +00:00
{
2018-04-19 15:18:26 +00:00
if (header.has(name_type.name))
2018-02-21 05:11:53 +00:00
{
2018-04-19 15:18:26 +00:00
auto & elem = header.getByName(name_type.name);
if (!elem.type->equals(*name_type.type))
2018-02-21 05:11:53 +00:00
{
2018-04-19 15:18:26 +00:00
elem.type = name_type.type;
elem.column = elem.type->createColumn();
2018-02-21 05:11:53 +00:00
}
}
}
2018-04-19 15:18:26 +00:00
executePrewhereActions(header, prewhere_info);
2018-09-07 20:23:28 +00:00
injectVirtualColumns(header);
2018-04-19 15:18:26 +00:00
ordered_names = getHeader().getNames();
}
2018-11-29 09:19:42 +00:00
Block MergeTreeSelectBlockInputStream::getHeader() const
2018-04-16 12:21:36 +00:00
{
2018-04-19 15:18:26 +00:00
return header;
2018-04-16 12:21:36 +00:00
}
2018-11-29 09:19:42 +00:00
bool MergeTreeSelectBlockInputStream::getNewTask()
try
2016-11-20 12:43:20 +00:00
{
2018-05-23 19:34:37 +00:00
/// Produce no more than one task
if (!is_first_task || total_marks_count == 0)
2017-04-05 20:34:19 +00:00
{
finish();
2017-04-05 20:34:19 +00:00
return false;
}
is_first_task = false;
2019-07-19 14:56:00 +00:00
task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns);
2017-04-05 20:34:19 +00:00
/** @note you could simply swap `reverse` in if and else branches of MergeTreeDataSelectExecutor,
* and remove this reverse. */
MarkRanges remaining_mark_ranges = all_mark_ranges;
std::reverse(remaining_mark_ranges.begin(), remaining_mark_ranges.end());
2018-09-03 05:06:19 +00:00
auto size_predictor = (preferred_block_size_bytes == 0)
? nullptr
: std::make_unique<MergeTreeBlockSizePredictor>(data_part, ordered_names, data_part->storage.getSampleBlock());
2017-04-05 20:34:19 +00:00
2019-07-19 14:56:00 +00:00
/// will be used to distinguish between PREWHERE and WHERE columns when applying filter
const auto & column_names = task_columns.columns.getNames();
column_name_set = NameSet{column_names.begin(), column_names.end()};
task = std::make_unique<MergeTreeReadTask>(
2019-07-19 14:56:00 +00:00
data_part, remaining_mark_ranges, part_index_in_query, ordered_names, column_name_set, task_columns.columns,
task_columns.pre_columns, prewhere_info && prewhere_info->remove_prewhere_column,
task_columns.should_reorder, std::move(size_predictor));
if (!reader)
{
if (use_uncompressed_cache)
owned_uncompressed_cache = storage.global_context.getUncompressedCache();
owned_mark_cache = storage.global_context.getMarkCache();
2019-10-10 16:30:30 +00:00
reader = data_part->getReader(task_columns.columns, all_mark_ranges,
owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings);
if (prewhere_info)
2019-10-10 16:30:30 +00:00
pre_reader = data_part->getReader(task_columns.pre_columns, all_mark_ranges,
owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings);
}
2017-04-05 20:34:19 +00:00
return true;
}
catch (...)
2017-04-05 20:34:19 +00:00
{
/// Suspicion of the broken part. A part is added to the queue for verification.
if (getCurrentExceptionCode() != ErrorCodes::MEMORY_LIMIT_EXCEEDED)
storage.reportBrokenPart(data_part->name);
throw;
}
2018-11-29 09:19:42 +00:00
void MergeTreeSelectBlockInputStream::finish()
{
/** Close the files (before destroying the object).
* When many sources are created, but simultaneously reading only a few of them,
* buffers don't waste memory.
*/
reader.reset();
pre_reader.reset();
part_columns_lock.unlock();
data_part.reset();
2016-11-20 12:43:20 +00:00
}
2018-11-29 09:19:42 +00:00
MergeTreeSelectBlockInputStream::~MergeTreeSelectBlockInputStream() = default;
2016-11-20 12:43:20 +00:00
}