#pragma once #include #include #include #include #include #include #include #include namespace DB { struct TemporaryFileStream; class Volume; using VolumePtr = std::shared_ptr; namespace ErrorCodes { } /** Merges stream of sorted each-separately blocks to sorted as-a-whole stream of blocks. * If data to sort is too much, could use external sorting, with temporary files. */ /** Part of implementation. Merging array of ready (already read from somewhere) blocks. * Returns result of merge as stream of blocks, not more than 'max_merged_block_size' rows in each. */ class MergeSortingBlocksBlockInputStream : public IBlockInputStream { public: /// limit - if not 0, allowed to return just first 'limit' rows in sorted order. MergeSortingBlocksBlockInputStream(Blocks & blocks_, const SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_ = 0); String getName() const override { return "MergeSortingBlocks"; } bool isSortedOutput() const override { return true; } const SortDescription & getSortDescription() const override { return description; } Block getHeader() const override { return header; } protected: Block readImpl() override; private: Blocks & blocks; Block header; SortDescription description; size_t max_merged_block_size; UInt64 limit; size_t total_merged_rows = 0; SortCursorImpls cursors; bool has_collation = false; SortingHeap queue_without_collation; SortingHeap queue_simple; SortingHeap queue_with_collation; /** Two different cursors are supported - with and without Collation. * Templates are used (instead of virtual functions in SortCursor) for zero-overhead. */ template Block mergeImpl(TSortingHeap & queue); }; class MergeSortingBlockInputStream : public IBlockInputStream { public: /// limit - if not 0, allowed to return just first 'limit' rows in sorted order. MergeSortingBlockInputStream(const BlockInputStreamPtr & input, SortDescription & description_, size_t max_merged_block_size_, UInt64 limit_, size_t max_bytes_before_remerge_, size_t max_bytes_before_external_sort_, VolumePtr tmp_volume_, size_t min_free_disk_space_); String getName() const override { return "MergeSorting"; } bool isSortedOutput() const override { return true; } const SortDescription & getSortDescription() const override { return description; } Block getHeader() const override { return header; } protected: Block readImpl() override; private: SortDescription description; size_t max_merged_block_size; UInt64 limit; size_t max_bytes_before_remerge; size_t max_bytes_before_external_sort; VolumePtr tmp_volume; size_t min_free_disk_space; Logger * log = &Logger::get("MergeSortingBlockInputStream"); Blocks blocks; size_t sum_rows_in_blocks = 0; size_t sum_bytes_in_blocks = 0; std::unique_ptr impl; /// Before operation, will remove constant columns from blocks. And after, place constant columns back. /// (to avoid excessive virtual function calls and because constants cannot be serialized in Native format for temporary files) /// Save original block structure here. Block header; Block header_without_constants; /// Everything below is for external sorting. std::vector> temporary_files; std::vector> temporary_inputs; BlockInputStreams inputs_to_merge; /// Merge all accumulated blocks to keep no more than limit rows. void remerge(); /// If remerge doesn't save memory at least several times, mark it as useless and don't do it anymore. bool remerge_is_useful = true; }; }