dbms: style corrections and comments [#METR-16457]

This commit is contained in:
Andrey Mironov 2015-09-09 20:39:28 +03:00
parent 47f291d721
commit 5caf4e401f
3 changed files with 37 additions and 12 deletions

View File

@ -10,16 +10,26 @@ namespace DB
{
/// A batch of work for MergeTreeThreadBlockInputStream
struct MergeTreeReadTask
{
/// data part which should be read while performing this task
MergeTreeData::DataPartPtr data_part;
/// ranges to read from `data_part`
MarkRanges mark_ranges;
/// for virtual `part_index` virtual column
std::size_t part_index_in_query;
/// ordered list of column names used in this query, allows returning blocks with consistent ordering
const Names & ordered_names;
/// used to determine whether column should be filtered during PREWHERE or WHERE
const NameSet & column_name_set;
/// column names to read during WHERE
const NamesAndTypesList & columns;
/// column names to read during PREWHERE
const NamesAndTypesList & pre_columns;
/// should PREWHERE column be returned to requesting side?
const bool remove_prewhere_column;
/// resulting block may require reordering in accordance with `ordered_names`
const bool should_reorder;
MergeTreeReadTask(
@ -34,6 +44,13 @@ struct MergeTreeReadTask
using MergeTreeReadTaskPtr = std::unique_ptr<MergeTreeReadTask>;
/** Provides read tasks for MergeTreeThreadBlockInputStream`s in fine-grained batches, allowing for more
* uniform distribution of work amongst multiple threads. All parts and their ranges are divided into `threads`
* workloads with at most `sum_marks / threads` marks. Then, threads are performing reads from these workloads
* in "sequential" manner, requesting work in small batches. As soon as some thread some thread has exhausted
* it's workload, it either is signaled that no more work is available (`do_not_steal_tasks == false`) or
* continues taking small batches from other threads' workloads (`do_not_steal_tasks == true`).
*/
class MergeTreeReadPool
{
public:
@ -399,30 +416,30 @@ public:
std::vector<NamesAndTypesList> per_part_columns;
std::vector<NamesAndTypesList> per_part_pre_columns;
/// @todo actually all of these values are either true or false for the whole query, thus no vector required
std::vector<bool> per_part_remove_prewhere_column;
std::vector<bool> per_part_should_reorder;
std::vector<char> per_part_remove_prewhere_column;
std::vector<char> per_part_should_reorder;
struct part_t
struct Part
{
MergeTreeData::DataPartPtr data_part;
std::size_t part_index_in_query;
};
std::vector<part_t> parts;
std::vector<Part> parts;
struct thread_task_t
struct ThreadTask
{
struct part_index_and_range_t
struct PartIndexAndRange
{
std::size_t part_idx;
MarkRanges ranges;
};
std::vector<part_index_and_range_t> parts_and_ranges;
std::vector<PartIndexAndRange> parts_and_ranges;
std::vector<std::size_t> sum_marks_in_parts;
};
std::vector<thread_task_t> threads_tasks;
std::vector<ThreadTask> threads_tasks;
std::unordered_set<std::size_t> remaining_thread_tasks;

View File

@ -35,10 +35,12 @@ public:
: uncompressed_cache(uncompressed_cache_), mark_cache(mark_cache_), storage(storage_),
aio_threshold(aio_threshold_), max_read_buffer_size(max_read_buffer_size_)
{
reconf(path_, data_part, columns_, all_mark_ranges);
reconfigure(path_, data_part, columns_, all_mark_ranges);
}
void reconf(
/** Allows to use the same MergeTreeReader across multiple data parts and/or columns and/or ranges,
* all while preserving avg_value_size_hints (may and does significantly improve read times). */
void reconfigure(
const String & path, const MergeTreeData::DataPartPtr & data_part, const NamesAndTypesList & columns,
const MarkRanges & all_mark_ranges)
{

View File

@ -11,8 +11,11 @@ namespace DB
{
/** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked
* to perform. */
class MergeTreeThreadBlockInputStream : public IProfilingBlockInputStream
{
/// "thread" index (there are N threads and each thread is assigned index in interval [0..N-1])
std::size_t thread;
public:
MergeTreeThreadBlockInputStream(
@ -82,6 +85,7 @@ protected:
}
private:
/// Requests read task from MergeTreeReadPool and signals whether it got one
bool getNewTask()
{
task = pool->getTask(min_marks_to_read, thread);
@ -117,9 +121,11 @@ private:
}
else
{
reader->reconf(path, task->data_part, task->columns, task->mark_ranges);
/** reader and possible pre_reader were already created, just configure them to a new data part, ranges and
* columns to preserve internal state. */
reader->reconfigure(path, task->data_part, task->columns, task->mark_ranges);
if (prewhere_actions)
pre_reader->reconf(path, task->data_part, task->pre_columns, task->mark_ranges);
pre_reader->reconfigure(path, task->data_part, task->pre_columns, task->mark_ranges);
}
return true;