#pragma once #include #include #include #include #include #include namespace DB { class IMergeTreeReader; using MergeTreeReaderPtr = std::unique_ptr; /// A class which is responsible for creating read tasks /// which are later taken by readers via getTask method. /// Does prefetching for the read tasks it creates. class MergeTreePrefetchedReadPool : public IMergeTreeReadPool, private WithContext { public: MergeTreePrefetchedReadPool( size_t threads, size_t sum_marks_, size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const StorageSnapshotPtr & storage_snapshot_, const PrewhereInfoPtr & prewhere_info_, const Names & column_names_, const Names & virtual_column_names_, size_t preferred_block_size_bytes_, const MergeTreeReaderSettings & reader_settings_, ContextPtr context_, bool use_uncompressed_cache_, bool is_remote_read_, const MergeTreeSettings & storage_settings_); ~MergeTreePrefetchedReadPool() override; MergeTreeReadTaskPtr getTask(size_t thread) override; void profileFeedback(ReadBufferFromFileBase::ProfileInfo) override {} Block getHeader() const override { return header; } static bool checkReadMethodAllowed(LocalFSReadMethod method); static bool checkReadMethodAllowed(RemoteFSReadMethod method); private: struct PartInfo; using PartInfoPtr = std::shared_ptr; using PartsInfos = std::vector; using MergeTreeReadTaskPtr = std::unique_ptr; using ThreadTasks = std::deque; using ThreadsTasks = std::map; /// smaller `priority` means more priority std::future createPrefetchedReader( const IMergeTreeDataPart & data_part, const NamesAndTypesList & columns, const MarkRanges & required_ranges, int64_t priority) const; void createPrefetchedReaderForTask(MergeTreeReadTask & task) const; size_t getApproxSizeOfGranule(const IMergeTreeDataPart & part) const; PartsInfos getPartsInfos(const RangesInDataParts & parts, size_t preferred_block_size_bytes) const; ThreadsTasks createThreadsTasks( size_t threads, size_t sum_marks, size_t min_marks_for_concurrent_read) const; void startPrefetches() const; static std::string dumpTasks(const ThreadsTasks & tasks); Poco::Logger * log; Block header; MarkCache * mark_cache; UncompressedCache * uncompressed_cache; MergeTreeReaderSettings reader_settings; ReadBufferFromFileBase::ProfileCallback profile_callback; size_t index_granularity_bytes; size_t fixed_index_granularity; [[ maybe_unused ]] const bool is_remote_read; ThreadPool & prefetch_threadpool; PartsInfos parts_infos; ThreadsTasks threads_tasks; struct TaskHolder { explicit TaskHolder(MergeTreeReadTask * task_) : task(task_) {} MergeTreeReadTask * task; bool operator <(const TaskHolder & other) const; }; mutable boost::heap::priority_queue prefetch_queue; bool started_prefetches = false; /// A struct which allows to track max number of tasks which were in the /// threadpool simultaneously (similar to CurrentMetrics, but the result /// will be put to QueryLog). struct PrefetchIncrement : boost::noncopyable { explicit PrefetchIncrement(std::shared_ptr counters_) : counters(counters_) { std::lock_guard lock(counters->mutex); ++counters->total_prefetch_tasks; if (++counters->current_parallel_prefetch_tasks > counters->max_parallel_prefetch_tasks) counters->max_parallel_prefetch_tasks = counters->current_parallel_prefetch_tasks; } ~PrefetchIncrement() { std::lock_guard lock(counters->mutex); --counters->current_parallel_prefetch_tasks; } std::shared_ptr counters; }; }; }