ClickHouse/src/Interpreters/MergeJoin.h

150 lines
5.2 KiB
C++
Raw Normal View History

2019-09-09 19:43:37 +00:00
#pragma once
2019-09-10 14:51:28 +00:00
#include <shared_mutex>
2019-09-09 19:43:37 +00:00
#include <Common/LRUCache.h>
2019-09-09 19:43:37 +00:00
#include <Core/Block.h>
2019-09-12 18:06:25 +00:00
#include <Core/SortDescription.h>
2019-09-09 19:43:37 +00:00
#include <Interpreters/IJoin.h>
#include <Interpreters/SortedBlocksWriter.h>
#include <DataStreams/SizeLimits.h>
2019-09-09 19:43:37 +00:00
namespace DB
{
class TableJoin;
2019-09-13 16:17:37 +00:00
class MergeJoinCursor;
struct MergeJoinEqualRange;
class RowBitmaps;
2019-09-09 19:43:37 +00:00
2019-09-09 19:43:37 +00:00
class MergeJoin : public IJoin
{
public:
MergeJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_sample_block);
2019-09-09 19:43:37 +00:00
2020-03-18 03:27:32 +00:00
bool addJoinedBlock(const Block & block, bool check_limits) override;
2020-01-15 20:33:29 +00:00
void joinBlock(Block &, ExtraBlockPtr & not_processed) override;
2019-09-19 14:53:03 +00:00
void joinTotals(Block &) const override;
2019-09-12 18:06:25 +00:00
void setTotals(const Block &) override;
2019-09-23 14:37:42 +00:00
bool hasTotals() const override { return totals; }
size_t getTotalRowCount() const override { return right_blocks.row_count; }
size_t getTotalByteCount() const override { return right_blocks.bytes; }
2019-09-09 19:43:37 +00:00
BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const override;
2019-09-09 19:43:37 +00:00
private:
friend class NonMergeJoinedBlockInputStream;
2020-01-15 20:33:29 +00:00
struct NotProcessed : public ExtraBlock
{
2020-01-15 21:33:21 +00:00
size_t left_position;
size_t right_position;
size_t right_block;
2020-01-15 20:33:29 +00:00
};
struct RightBlockInfo
{
std::shared_ptr<Block> block;
size_t block_number;
size_t & skip;
RowBitmaps * bitmaps;
std::unique_ptr<std::vector<bool>> used_bitmap;
RightBlockInfo(std::shared_ptr<Block> block_, size_t block_number, size_t & skip_, RowBitmaps * bitmaps);
~RightBlockInfo(); /// apply used bitmap
void setUsed(size_t start, size_t length);
};
/// There're two size limits for right-hand table: max_rows_in_join, max_bytes_in_join.
2020-08-08 01:01:47 +00:00
/// max_bytes is preferred. If it isn't set we approximate it as (max_rows * bytes/row).
struct BlockByteWeight
{
size_t operator()(const Block & block) const { return block.bytes(); }
};
using Cache = LRUCache<size_t, Block, std::hash<size_t>, BlockByteWeight>;
2019-09-10 14:51:28 +00:00
mutable std::shared_mutex rwlock;
std::shared_ptr<TableJoin> table_join;
SizeLimits size_limits;
2019-09-12 18:06:25 +00:00
SortDescription left_sort_description;
2019-09-13 16:17:37 +00:00
SortDescription right_sort_description;
SortDescription left_merge_description;
SortDescription right_merge_description;
Block right_sample_block;
2019-09-11 16:19:33 +00:00
Block right_table_keys;
2019-09-12 14:09:05 +00:00
Block right_columns_to_add;
SortedBlocksWriter::Blocks right_blocks;
Blocks min_max_right_blocks;
std::shared_ptr<SortedBlocksBuffer> left_blocks_buffer;
std::shared_ptr<RowBitmaps> used_rows_bitmap;
mutable std::unique_ptr<Cache> cached_right_blocks;
std::vector<std::shared_ptr<Block>> loaded_right_blocks;
std::unique_ptr<SortedBlocksWriter> disk_writer;
SortedBlocksWriter::SortedFiles flushed_right_blocks;
2019-09-12 18:06:25 +00:00
Block totals;
std::atomic<bool> is_in_memory{true};
const bool nullable_right_side;
const bool nullable_left_side;
const bool is_any_join;
const bool is_all_join;
const bool is_semi_join;
const bool is_inner;
const bool is_left;
const bool is_right;
const bool is_full;
static constexpr const bool skip_not_intersected = true; /// skip index for right blocks
2020-01-17 11:45:43 +00:00
const size_t max_joined_block_rows;
const size_t max_rows_in_right_block;
const size_t max_files_to_merge;
2019-09-09 19:43:37 +00:00
2020-04-22 06:01:33 +00:00
void changeLeftColumns(Block & block, MutableColumns && columns) const;
void addRightColumns(Block & block, MutableColumns && columns);
2020-01-15 20:33:29 +00:00
template <bool is_all>
ExtraBlockPtr extraBlock(Block & processed, MutableColumns && left_columns, MutableColumns && right_columns,
2020-01-15 21:33:21 +00:00
size_t left_position, size_t right_position, size_t right_block_number);
2019-09-13 16:17:37 +00:00
2019-09-12 18:06:25 +00:00
void mergeRightBlocks();
template <bool in_memory>
size_t rightBlocksCount() const;
template <bool in_memory, bool is_all>
2020-01-15 20:33:29 +00:00
void joinSortedBlock(Block & block, ExtraBlockPtr & not_processed);
template <bool in_memory>
std::shared_ptr<Block> loadRightBlock(size_t pos) const;
std::shared_ptr<Block> getRightBlock(size_t pos) const
{
if (is_in_memory)
return loadRightBlock<true>(pos);
return loadRightBlock<false>(pos);
}
size_t getRightBlocksCount() const
{
if (is_in_memory)
return rightBlocksCount<true>();
return rightBlocksCount<false>();
}
template <bool is_all> /// ALL or ANY
bool leftJoin(MergeJoinCursor & left_cursor, const Block & left_block, RightBlockInfo & right_block_info,
MutableColumns & left_columns, MutableColumns & right_columns, size_t & left_key_tail);
bool semiLeftJoin(MergeJoinCursor & left_cursor, const Block & left_block, const RightBlockInfo & right_block_info,
MutableColumns & left_columns, MutableColumns & right_columns);
bool allInnerJoin(MergeJoinCursor & left_cursor, const Block & left_block, RightBlockInfo & right_block_info,
MutableColumns & left_columns, MutableColumns & right_columns, size_t & left_key_tail);
Block modifyRightBlock(const Block & src_block) const;
bool saveRightBlock(Block && block);
void mergeInMemoryRightBlocks();
void mergeFlushedRightBlocks();
void initRightTableWriter();
2019-09-09 19:43:37 +00:00
};
}