2019-09-09 19:43:37 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <memory>
|
2019-09-10 18:39:10 +00:00
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
#include <Core/Names.h>
|
2022-03-21 13:12:17 +00:00
|
|
|
#include <Core/Block.h>
|
2019-09-12 12:24:19 +00:00
|
|
|
#include <Columns/IColumn.h>
|
2022-03-30 10:07:09 +00:00
|
|
|
#include <Common/Exception.h>
|
2019-09-09 19:43:37 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2020-01-15 20:33:29 +00:00
|
|
|
struct ExtraBlock;
|
2020-12-10 23:56:57 +00:00
|
|
|
using ExtraBlockPtr = std::shared_ptr<ExtraBlock>;
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2021-03-25 18:11:54 +00:00
|
|
|
class TableJoin;
|
2021-08-17 13:30:01 +00:00
|
|
|
class NotJoinedBlocks;
|
2022-10-04 08:20:13 +00:00
|
|
|
class IBlocksStream;
|
2022-10-18 11:43:01 +00:00
|
|
|
using IBlocksStreamPtr = std::shared_ptr<IBlocksStream>;
|
2022-10-04 08:20:13 +00:00
|
|
|
|
|
|
|
class IJoin;
|
|
|
|
using JoinPtr = std::shared_ptr<IJoin>;
|
2021-03-25 18:11:54 +00:00
|
|
|
|
2022-03-30 10:07:09 +00:00
|
|
|
enum class JoinPipelineType
|
|
|
|
{
|
|
|
|
/*
|
2022-07-05 16:21:35 +00:00
|
|
|
* Right stream processed first, then when join data structures are ready, the left stream is processed using it.
|
2022-03-30 10:07:09 +00:00
|
|
|
* The pipeline is not sorted.
|
|
|
|
*/
|
2022-04-01 18:16:38 +00:00
|
|
|
FillRightFirst,
|
2022-03-30 10:07:09 +00:00
|
|
|
|
|
|
|
/*
|
2022-07-05 16:21:35 +00:00
|
|
|
* Only the left stream is processed. Right is already filled.
|
2022-03-30 10:07:09 +00:00
|
|
|
*/
|
2022-04-01 18:16:38 +00:00
|
|
|
FilledRight,
|
2022-03-30 10:07:09 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The pipeline is created from the left and right streams processed with merging transform.
|
2022-07-05 16:21:35 +00:00
|
|
|
* Left and right streams have the same priority and are processed simultaneously.
|
|
|
|
* The pipelines are sorted.
|
2022-03-30 10:07:09 +00:00
|
|
|
*/
|
|
|
|
YShaped,
|
|
|
|
};
|
|
|
|
|
2019-09-09 19:43:37 +00:00
|
|
|
class IJoin
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
virtual ~IJoin() = default;
|
|
|
|
|
2023-08-03 15:55:15 +00:00
|
|
|
virtual std::string getName() const = 0;
|
|
|
|
|
2021-03-25 18:11:54 +00:00
|
|
|
virtual const TableJoin & getTableJoin() const = 0;
|
|
|
|
|
2019-09-09 19:43:37 +00:00
|
|
|
/// Add block of data from right hand of JOIN.
|
|
|
|
/// @returns false, if some limit was exceeded and you should not insert more data.
|
2023-07-05 17:03:18 +00:00
|
|
|
virtual bool addBlockToJoin(const Block & block, bool check_limits = true) = 0; /// NOLINT
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2022-11-29 11:46:11 +00:00
|
|
|
/* Some initialization may be required before joinBlock() call.
|
|
|
|
* It's better to done in in constructor, but left block exact structure is not known at that moment.
|
|
|
|
* TODO: pass correct left block sample to the constructor.
|
|
|
|
*/
|
|
|
|
virtual void initialize(const Block & /* left_sample_block */) {}
|
|
|
|
|
2021-09-13 13:35:17 +00:00
|
|
|
virtual void checkTypesOfKeys(const Block & block) const = 0;
|
|
|
|
|
2023-07-05 17:03:18 +00:00
|
|
|
/// Join the block with data from left hand of JOIN to the right hand data (that was previously built by calls to addBlockToJoin).
|
2019-09-09 19:43:37 +00:00
|
|
|
/// Could be called from different threads in parallel.
|
2020-01-15 20:33:29 +00:00
|
|
|
virtual void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & not_processed) = 0;
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2022-03-21 13:12:17 +00:00
|
|
|
/** Set/Get totals for right table
|
|
|
|
* Keep "totals" (separate part of dataset, see WITH TOTALS) to use later.
|
|
|
|
*/
|
|
|
|
virtual void setTotals(const Block & block) { totals = block; }
|
|
|
|
virtual const Block & getTotals() const { return totals; }
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2022-07-04 17:10:34 +00:00
|
|
|
/// Number of rows/bytes stored in memory
|
2019-09-10 14:51:28 +00:00
|
|
|
virtual size_t getTotalRowCount() const = 0;
|
2020-02-17 17:08:31 +00:00
|
|
|
virtual size_t getTotalByteCount() const = 0;
|
2022-07-04 17:10:34 +00:00
|
|
|
|
|
|
|
/// Returns true if no data to join with.
|
2021-08-03 09:34:08 +00:00
|
|
|
virtual bool alwaysReturnsEmptySet() const = 0;
|
2019-09-16 12:37:46 +00:00
|
|
|
|
2023-07-05 17:03:18 +00:00
|
|
|
/// StorageJoin/Dictionary is already filled. No need to call addBlockToJoin.
|
2021-04-29 09:08:49 +00:00
|
|
|
/// Different query plan is used for such joins.
|
2022-04-01 18:16:38 +00:00
|
|
|
virtual bool isFilled() const { return pipelineType() == JoinPipelineType::FilledRight; }
|
|
|
|
virtual JoinPipelineType pipelineType() const { return JoinPipelineType::FillRightFirst; }
|
2021-04-28 17:32:12 +00:00
|
|
|
|
2022-04-19 08:07:30 +00:00
|
|
|
// That can run FillingRightJoinSideTransform parallelly
|
|
|
|
virtual bool supportParallelJoin() const { return false; }
|
2022-10-27 15:25:50 +00:00
|
|
|
virtual bool supportTotals() const { return true; }
|
2022-04-19 08:07:30 +00:00
|
|
|
|
2022-06-16 12:09:23 +00:00
|
|
|
/// Peek next stream of delayed joined blocks.
|
2022-10-18 11:43:01 +00:00
|
|
|
virtual IBlocksStreamPtr getDelayedBlocks() { return nullptr; }
|
2022-10-06 17:47:40 +00:00
|
|
|
virtual bool hasDelayedBlocks() const { return false; }
|
2022-10-04 08:20:13 +00:00
|
|
|
|
2022-10-18 11:43:01 +00:00
|
|
|
virtual IBlocksStreamPtr
|
2022-10-04 08:20:13 +00:00
|
|
|
getNonJoinedBlocks(const Block & left_sample_block, const Block & result_sample_block, UInt64 max_block_size) const = 0;
|
2022-09-07 08:00:14 +00:00
|
|
|
|
2022-03-21 13:12:17 +00:00
|
|
|
private:
|
|
|
|
Block totals;
|
2019-09-09 19:43:37 +00:00
|
|
|
};
|
|
|
|
|
2022-10-04 08:20:13 +00:00
|
|
|
class IBlocksStream
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/// Returns empty block on EOF
|
2022-10-05 12:40:32 +00:00
|
|
|
Block next()
|
|
|
|
{
|
|
|
|
if (finished)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
if (Block res = nextImpl())
|
|
|
|
return res;
|
|
|
|
|
|
|
|
finished = true;
|
|
|
|
return {};
|
|
|
|
}
|
2022-03-30 10:07:09 +00:00
|
|
|
|
2022-10-04 08:20:13 +00:00
|
|
|
virtual ~IBlocksStream() = default;
|
2022-10-05 12:40:32 +00:00
|
|
|
|
|
|
|
bool isFinished() const { return finished; }
|
|
|
|
|
|
|
|
protected:
|
|
|
|
virtual Block nextImpl() = 0;
|
|
|
|
|
2022-10-27 14:23:55 +00:00
|
|
|
std::atomic_bool finished{false};
|
2022-10-05 12:40:32 +00:00
|
|
|
|
2022-10-04 08:20:13 +00:00
|
|
|
};
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2019-09-11 18:03:21 +00:00
|
|
|
}
|