mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
dbms: style corrections and comments [#METR-16457]
This commit is contained in:
parent
47f291d721
commit
5caf4e401f
@ -10,16 +10,26 @@ namespace DB
|
||||
{
|
||||
|
||||
|
||||
/// A batch of work for MergeTreeThreadBlockInputStream
|
||||
struct MergeTreeReadTask
|
||||
{
|
||||
/// data part which should be read while performing this task
|
||||
MergeTreeData::DataPartPtr data_part;
|
||||
/// ranges to read from `data_part`
|
||||
MarkRanges mark_ranges;
|
||||
/// for virtual `part_index` virtual column
|
||||
std::size_t part_index_in_query;
|
||||
/// ordered list of column names used in this query, allows returning blocks with consistent ordering
|
||||
const Names & ordered_names;
|
||||
/// used to determine whether column should be filtered during PREWHERE or WHERE
|
||||
const NameSet & column_name_set;
|
||||
/// column names to read during WHERE
|
||||
const NamesAndTypesList & columns;
|
||||
/// column names to read during PREWHERE
|
||||
const NamesAndTypesList & pre_columns;
|
||||
/// should PREWHERE column be returned to requesting side?
|
||||
const bool remove_prewhere_column;
|
||||
/// resulting block may require reordering in accordance with `ordered_names`
|
||||
const bool should_reorder;
|
||||
|
||||
MergeTreeReadTask(
|
||||
@ -34,6 +44,13 @@ struct MergeTreeReadTask
|
||||
|
||||
using MergeTreeReadTaskPtr = std::unique_ptr<MergeTreeReadTask>;
|
||||
|
||||
/** Provides read tasks for MergeTreeThreadBlockInputStream`s in fine-grained batches, allowing for more
|
||||
* uniform distribution of work amongst multiple threads. All parts and their ranges are divided into `threads`
|
||||
* workloads with at most `sum_marks / threads` marks. Then, threads are performing reads from these workloads
|
||||
* in "sequential" manner, requesting work in small batches. As soon as some thread some thread has exhausted
|
||||
* it's workload, it either is signaled that no more work is available (`do_not_steal_tasks == false`) or
|
||||
* continues taking small batches from other threads' workloads (`do_not_steal_tasks == true`).
|
||||
*/
|
||||
class MergeTreeReadPool
|
||||
{
|
||||
public:
|
||||
@ -399,30 +416,30 @@ public:
|
||||
std::vector<NamesAndTypesList> per_part_columns;
|
||||
std::vector<NamesAndTypesList> per_part_pre_columns;
|
||||
/// @todo actually all of these values are either true or false for the whole query, thus no vector required
|
||||
std::vector<bool> per_part_remove_prewhere_column;
|
||||
std::vector<bool> per_part_should_reorder;
|
||||
std::vector<char> per_part_remove_prewhere_column;
|
||||
std::vector<char> per_part_should_reorder;
|
||||
|
||||
struct part_t
|
||||
struct Part
|
||||
{
|
||||
MergeTreeData::DataPartPtr data_part;
|
||||
std::size_t part_index_in_query;
|
||||
};
|
||||
|
||||
std::vector<part_t> parts;
|
||||
std::vector<Part> parts;
|
||||
|
||||
struct thread_task_t
|
||||
struct ThreadTask
|
||||
{
|
||||
struct part_index_and_range_t
|
||||
struct PartIndexAndRange
|
||||
{
|
||||
std::size_t part_idx;
|
||||
MarkRanges ranges;
|
||||
};
|
||||
|
||||
std::vector<part_index_and_range_t> parts_and_ranges;
|
||||
std::vector<PartIndexAndRange> parts_and_ranges;
|
||||
std::vector<std::size_t> sum_marks_in_parts;
|
||||
};
|
||||
|
||||
std::vector<thread_task_t> threads_tasks;
|
||||
std::vector<ThreadTask> threads_tasks;
|
||||
|
||||
std::unordered_set<std::size_t> remaining_thread_tasks;
|
||||
|
||||
|
@ -35,10 +35,12 @@ public:
|
||||
: uncompressed_cache(uncompressed_cache_), mark_cache(mark_cache_), storage(storage_),
|
||||
aio_threshold(aio_threshold_), max_read_buffer_size(max_read_buffer_size_)
|
||||
{
|
||||
reconf(path_, data_part, columns_, all_mark_ranges);
|
||||
reconfigure(path_, data_part, columns_, all_mark_ranges);
|
||||
}
|
||||
|
||||
void reconf(
|
||||
/** Allows to use the same MergeTreeReader across multiple data parts and/or columns and/or ranges,
|
||||
* all while preserving avg_value_size_hints (may and does significantly improve read times). */
|
||||
void reconfigure(
|
||||
const String & path, const MergeTreeData::DataPartPtr & data_part, const NamesAndTypesList & columns,
|
||||
const MarkRanges & all_mark_ranges)
|
||||
{
|
||||
|
@ -11,8 +11,11 @@ namespace DB
|
||||
{
|
||||
|
||||
|
||||
/** Used in conjunction with MergeTreeReadPool, asking it for more work to do and performing whatever reads it is asked
|
||||
* to perform. */
|
||||
class MergeTreeThreadBlockInputStream : public IProfilingBlockInputStream
|
||||
{
|
||||
/// "thread" index (there are N threads and each thread is assigned index in interval [0..N-1])
|
||||
std::size_t thread;
|
||||
public:
|
||||
MergeTreeThreadBlockInputStream(
|
||||
@ -82,6 +85,7 @@ protected:
|
||||
}
|
||||
|
||||
private:
|
||||
/// Requests read task from MergeTreeReadPool and signals whether it got one
|
||||
bool getNewTask()
|
||||
{
|
||||
task = pool->getTask(min_marks_to_read, thread);
|
||||
@ -117,9 +121,11 @@ private:
|
||||
}
|
||||
else
|
||||
{
|
||||
reader->reconf(path, task->data_part, task->columns, task->mark_ranges);
|
||||
/** reader and possible pre_reader were already created, just configure them to a new data part, ranges and
|
||||
* columns to preserve internal state. */
|
||||
reader->reconfigure(path, task->data_part, task->columns, task->mark_ranges);
|
||||
if (prewhere_actions)
|
||||
pre_reader->reconf(path, task->data_part, task->pre_columns, task->mark_ranges);
|
||||
pre_reader->reconfigure(path, task->data_part, task->pre_columns, task->mark_ranges);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
Loading…
Reference in New Issue
Block a user