2019-09-09 19:43:37 +00:00
|
|
|
#pragma once
|
|
|
|
|
2023-01-12 15:51:04 +00:00
|
|
|
#include <Common/SharedMutex.h>
|
2022-04-30 11:53:59 +00:00
|
|
|
#include <Common/CacheBase.h>
|
2019-09-09 19:43:37 +00:00
|
|
|
#include <Core/Block.h>
|
2019-09-12 18:06:25 +00:00
|
|
|
#include <Core/SortDescription.h>
|
2019-09-09 19:43:37 +00:00
|
|
|
#include <Interpreters/IJoin.h>
|
2020-04-28 13:55:50 +00:00
|
|
|
#include <Interpreters/SortedBlocksWriter.h>
|
2021-10-15 20:18:20 +00:00
|
|
|
#include <QueryPipeline/SizeLimits.h>
|
2019-09-09 19:43:37 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
class TableJoin;
|
2019-09-13 16:17:37 +00:00
|
|
|
class MergeJoinCursor;
|
|
|
|
struct MergeJoinEqualRange;
|
2020-07-10 18:10:06 +00:00
|
|
|
class RowBitmaps;
|
2021-07-21 17:03:33 +00:00
|
|
|
enum class JoinTableSide;
|
2020-06-16 20:13:18 +00:00
|
|
|
|
2019-09-09 19:43:37 +00:00
|
|
|
class MergeJoin : public IJoin
|
|
|
|
{
|
|
|
|
public:
|
2020-04-07 09:48:47 +00:00
|
|
|
MergeJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_sample_block);
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2021-03-25 18:11:54 +00:00
|
|
|
const TableJoin & getTableJoin() const override { return *table_join; }
|
2020-03-18 03:27:32 +00:00
|
|
|
bool addJoinedBlock(const Block & block, bool check_limits) override;
|
2021-09-13 13:35:17 +00:00
|
|
|
void checkTypesOfKeys(const Block & block) const override;
|
2020-01-15 20:33:29 +00:00
|
|
|
void joinBlock(Block &, ExtraBlockPtr & not_processed) override;
|
2021-07-14 10:02:23 +00:00
|
|
|
|
2019-09-12 18:06:25 +00:00
|
|
|
void setTotals(const Block &) override;
|
2021-07-14 10:02:23 +00:00
|
|
|
|
2020-04-28 13:55:50 +00:00
|
|
|
size_t getTotalRowCount() const override { return right_blocks.row_count; }
|
|
|
|
size_t getTotalByteCount() const override { return right_blocks.bytes; }
|
2021-08-02 10:49:56 +00:00
|
|
|
/// Has to be called only after setTotals()/mergeRightBlocks()
|
|
|
|
bool alwaysReturnsEmptySet() const override { return (is_right || is_inner) && min_max_right_blocks.empty(); }
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2022-10-18 11:43:01 +00:00
|
|
|
IBlocksStreamPtr getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const override;
|
2020-07-10 18:10:06 +00:00
|
|
|
|
2022-07-15 14:57:58 +00:00
|
|
|
static bool isSupported(const std::shared_ptr<TableJoin> & table_join);
|
|
|
|
|
2019-09-09 19:43:37 +00:00
|
|
|
private:
|
2021-08-09 14:30:37 +00:00
|
|
|
friend class NotJoinedMerge;
|
2020-07-10 18:10:06 +00:00
|
|
|
|
2020-01-15 20:33:29 +00:00
|
|
|
struct NotProcessed : public ExtraBlock
|
|
|
|
{
|
2020-01-15 21:33:21 +00:00
|
|
|
size_t left_position;
|
2022-08-18 10:09:47 +00:00
|
|
|
size_t left_key_tail;
|
2020-01-15 21:33:21 +00:00
|
|
|
size_t right_position;
|
|
|
|
size_t right_block;
|
2020-01-15 20:33:29 +00:00
|
|
|
};
|
|
|
|
|
2020-07-10 18:10:06 +00:00
|
|
|
struct RightBlockInfo
|
|
|
|
{
|
|
|
|
std::shared_ptr<Block> block;
|
|
|
|
size_t block_number;
|
|
|
|
size_t & skip;
|
|
|
|
RowBitmaps * bitmaps;
|
|
|
|
std::unique_ptr<std::vector<bool>> used_bitmap;
|
|
|
|
|
|
|
|
RightBlockInfo(std::shared_ptr<Block> block_, size_t block_number, size_t & skip_, RowBitmaps * bitmaps);
|
|
|
|
~RightBlockInfo(); /// apply used bitmap
|
|
|
|
void setUsed(size_t start, size_t length);
|
|
|
|
};
|
|
|
|
|
2019-10-15 16:31:49 +00:00
|
|
|
/// There're two size limits for right-hand table: max_rows_in_join, max_bytes_in_join.
|
2020-08-08 01:01:47 +00:00
|
|
|
/// max_bytes is preferred. If it isn't set we approximate it as (max_rows * bytes/row).
|
2019-10-15 16:31:49 +00:00
|
|
|
struct BlockByteWeight
|
|
|
|
{
|
|
|
|
size_t operator()(const Block & block) const { return block.bytes(); }
|
|
|
|
};
|
|
|
|
|
2022-04-30 11:53:59 +00:00
|
|
|
using Cache = CacheBase<size_t, Block, std::hash<size_t>, BlockByteWeight>;
|
2019-10-15 16:31:49 +00:00
|
|
|
|
2023-01-12 15:51:04 +00:00
|
|
|
mutable SharedMutex rwlock;
|
2020-04-07 09:48:47 +00:00
|
|
|
std::shared_ptr<TableJoin> table_join;
|
2019-10-15 16:31:49 +00:00
|
|
|
SizeLimits size_limits;
|
2019-09-12 18:06:25 +00:00
|
|
|
SortDescription left_sort_description;
|
2019-09-13 16:17:37 +00:00
|
|
|
SortDescription right_sort_description;
|
|
|
|
SortDescription left_merge_description;
|
|
|
|
SortDescription right_merge_description;
|
2019-10-15 16:31:49 +00:00
|
|
|
Block right_sample_block;
|
2019-09-11 16:19:33 +00:00
|
|
|
Block right_table_keys;
|
2021-08-06 14:15:11 +00:00
|
|
|
/// Columns from right side of join, both key and additional
|
2019-09-12 14:09:05 +00:00
|
|
|
Block right_columns_to_add;
|
2020-04-28 13:55:50 +00:00
|
|
|
SortedBlocksWriter::Blocks right_blocks;
|
2021-04-02 13:55:42 +00:00
|
|
|
|
2021-07-21 17:03:33 +00:00
|
|
|
Names key_names_right;
|
|
|
|
Names key_names_left;
|
|
|
|
|
|
|
|
/// Additional conditions for rows to join from JOIN ON section.
|
|
|
|
/// Only rows where conditions are met can be joined.
|
|
|
|
String mask_column_name_left;
|
|
|
|
String mask_column_name_right;
|
|
|
|
|
2021-04-02 13:55:42 +00:00
|
|
|
/// Each block stores first and last row from corresponding sorted block on disk
|
2019-10-15 16:31:49 +00:00
|
|
|
Blocks min_max_right_blocks;
|
2020-06-16 20:13:18 +00:00
|
|
|
std::shared_ptr<SortedBlocksBuffer> left_blocks_buffer;
|
2020-07-10 18:10:06 +00:00
|
|
|
std::shared_ptr<RowBitmaps> used_rows_bitmap;
|
|
|
|
mutable std::unique_ptr<Cache> cached_right_blocks;
|
2019-10-15 16:31:49 +00:00
|
|
|
std::vector<std::shared_ptr<Block>> loaded_right_blocks;
|
2020-04-28 13:55:50 +00:00
|
|
|
std::unique_ptr<SortedBlocksWriter> disk_writer;
|
2021-04-02 13:55:42 +00:00
|
|
|
/// Set of files with sorted blocks
|
2020-04-28 13:55:50 +00:00
|
|
|
SortedBlocksWriter::SortedFiles flushed_right_blocks;
|
|
|
|
std::atomic<bool> is_in_memory{true};
|
2020-01-31 14:29:49 +00:00
|
|
|
const bool is_any_join;
|
2020-01-13 18:00:32 +00:00
|
|
|
const bool is_all_join;
|
2020-01-31 14:29:49 +00:00
|
|
|
const bool is_semi_join;
|
2019-09-16 19:31:22 +00:00
|
|
|
const bool is_inner;
|
|
|
|
const bool is_left;
|
2020-07-10 18:10:06 +00:00
|
|
|
const bool is_right;
|
|
|
|
const bool is_full;
|
2020-06-16 20:13:18 +00:00
|
|
|
static constexpr const bool skip_not_intersected = true; /// skip index for right blocks
|
2020-01-17 11:45:43 +00:00
|
|
|
const size_t max_joined_block_rows;
|
2019-10-15 16:31:49 +00:00
|
|
|
const size_t max_rows_in_right_block;
|
2020-04-28 13:55:50 +00:00
|
|
|
const size_t max_files_to_merge;
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2021-05-17 11:18:03 +00:00
|
|
|
Names lowcard_right_keys;
|
|
|
|
|
2021-11-08 12:36:34 +00:00
|
|
|
Poco::Logger * log;
|
|
|
|
|
2020-04-22 06:01:33 +00:00
|
|
|
void changeLeftColumns(Block & block, MutableColumns && columns) const;
|
2019-09-16 19:31:22 +00:00
|
|
|
void addRightColumns(Block & block, MutableColumns && columns);
|
2020-01-15 20:33:29 +00:00
|
|
|
|
|
|
|
template <bool is_all>
|
|
|
|
ExtraBlockPtr extraBlock(Block & processed, MutableColumns && left_columns, MutableColumns && right_columns,
|
2022-08-18 10:09:47 +00:00
|
|
|
size_t left_position, size_t left_key_tail, size_t right_position,
|
|
|
|
size_t right_block_number);
|
2019-09-13 16:17:37 +00:00
|
|
|
|
2019-09-12 18:06:25 +00:00
|
|
|
void mergeRightBlocks();
|
2019-10-15 16:31:49 +00:00
|
|
|
|
|
|
|
template <bool in_memory>
|
2020-07-10 18:10:06 +00:00
|
|
|
size_t rightBlocksCount() const;
|
2020-01-13 18:00:32 +00:00
|
|
|
template <bool in_memory, bool is_all>
|
2020-01-15 20:33:29 +00:00
|
|
|
void joinSortedBlock(Block & block, ExtraBlockPtr & not_processed);
|
2019-10-15 16:31:49 +00:00
|
|
|
template <bool in_memory>
|
2020-07-10 18:10:06 +00:00
|
|
|
std::shared_ptr<Block> loadRightBlock(size_t pos) const;
|
|
|
|
|
|
|
|
std::shared_ptr<Block> getRightBlock(size_t pos) const
|
|
|
|
{
|
|
|
|
if (is_in_memory)
|
|
|
|
return loadRightBlock<true>(pos);
|
|
|
|
return loadRightBlock<false>(pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getRightBlocksCount() const
|
|
|
|
{
|
|
|
|
if (is_in_memory)
|
|
|
|
return rightBlocksCount<true>();
|
|
|
|
return rightBlocksCount<false>();
|
|
|
|
}
|
2019-10-15 16:31:49 +00:00
|
|
|
|
2020-01-31 14:29:49 +00:00
|
|
|
template <bool is_all> /// ALL or ANY
|
2020-07-10 18:10:06 +00:00
|
|
|
bool leftJoin(MergeJoinCursor & left_cursor, const Block & left_block, RightBlockInfo & right_block_info,
|
|
|
|
MutableColumns & left_columns, MutableColumns & right_columns, size_t & left_key_tail);
|
|
|
|
bool semiLeftJoin(MergeJoinCursor & left_cursor, const Block & left_block, const RightBlockInfo & right_block_info,
|
2020-01-31 14:29:49 +00:00
|
|
|
MutableColumns & left_columns, MutableColumns & right_columns);
|
2020-07-10 18:10:06 +00:00
|
|
|
bool allInnerJoin(MergeJoinCursor & left_cursor, const Block & left_block, RightBlockInfo & right_block_info,
|
|
|
|
MutableColumns & left_columns, MutableColumns & right_columns, size_t & left_key_tail);
|
2019-10-15 16:31:49 +00:00
|
|
|
|
2020-07-10 18:10:06 +00:00
|
|
|
Block modifyRightBlock(const Block & src_block) const;
|
2019-10-15 16:31:49 +00:00
|
|
|
bool saveRightBlock(Block && block);
|
|
|
|
|
|
|
|
void mergeInMemoryRightBlocks();
|
|
|
|
void mergeFlushedRightBlocks();
|
2020-06-16 20:13:18 +00:00
|
|
|
|
|
|
|
void initRightTableWriter();
|
2021-07-21 17:03:33 +00:00
|
|
|
|
|
|
|
bool needConditionJoinColumn() const;
|
|
|
|
void addConditionJoinColumn(Block & block, JoinTableSide block_side) const;
|
2019-09-09 19:43:37 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|