2022-04-08 10:04:52 +00:00
|
|
|
#pragma once
|
|
|
|
|
2022-03-30 10:07:09 +00:00
|
|
|
#include <Interpreters/IJoin.h>
|
|
|
|
#include <Interpreters/TableJoin.h>
|
2022-04-26 20:22:57 +00:00
|
|
|
#include <DataTypes/DataTypeNullable.h>
|
|
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
2022-03-30 10:07:09 +00:00
|
|
|
#include <Poco/Logger.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-04-26 20:22:57 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2022-07-06 12:25:12 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2022-04-26 20:22:57 +00:00
|
|
|
extern const int NOT_IMPLEMENTED;
|
2022-07-06 12:25:12 +00:00
|
|
|
extern const int TYPE_MISMATCH;
|
2022-04-26 20:22:57 +00:00
|
|
|
}
|
|
|
|
|
2022-03-30 10:07:09 +00:00
|
|
|
/// Dummy class, actual joining is done by MergeTransform
|
|
|
|
class FullSortingMergeJoin : public IJoin
|
|
|
|
{
|
|
|
|
public:
|
2022-04-05 10:12:42 +00:00
|
|
|
explicit FullSortingMergeJoin(std::shared_ptr<TableJoin> table_join_, const Block & right_sample_block_)
|
2022-03-30 10:07:09 +00:00
|
|
|
: table_join(table_join_)
|
2022-04-01 18:20:58 +00:00
|
|
|
, right_sample_block(right_sample_block_)
|
2022-03-30 10:07:09 +00:00
|
|
|
{
|
|
|
|
LOG_TRACE(&Poco::Logger::get("FullSortingMergeJoin"), "Will use full sorting merge join");
|
|
|
|
}
|
|
|
|
|
2022-04-05 10:12:42 +00:00
|
|
|
const TableJoin & getTableJoin() const override { return *table_join; }
|
2022-03-30 10:07:09 +00:00
|
|
|
|
2022-07-05 16:21:35 +00:00
|
|
|
bool addJoinedBlock(const Block & /* block */, bool /* check_limits */) override
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::addJoinedBlock should not be called");
|
|
|
|
}
|
2022-03-30 10:07:09 +00:00
|
|
|
|
2022-07-15 14:57:58 +00:00
|
|
|
static bool isSupported(const std::shared_ptr<TableJoin> & table_join)
|
2022-03-30 10:07:09 +00:00
|
|
|
{
|
2022-07-15 14:57:58 +00:00
|
|
|
if (!table_join->oneDisjunct())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
bool support_storage = !table_join->isSpecialStorage();
|
|
|
|
|
|
|
|
const auto & on_expr = table_join->getOnlyClause();
|
|
|
|
bool support_conditions = !on_expr.on_filter_condition_left && !on_expr.on_filter_condition_right;
|
2022-04-26 20:22:57 +00:00
|
|
|
|
2022-05-03 12:05:22 +00:00
|
|
|
/// Key column can change nullability and it's not handled on type conversion stage, so algorithm should be aware of it
|
2022-07-15 14:57:58 +00:00
|
|
|
bool support_using_and_nulls = !table_join->hasUsing() || !table_join->joinUseNulls();
|
|
|
|
|
|
|
|
return support_conditions && support_using_and_nulls && support_storage;
|
|
|
|
}
|
|
|
|
|
|
|
|
void checkTypesOfKeys(const Block & left_block) const override
|
|
|
|
{
|
|
|
|
if (!isSupported(table_join))
|
|
|
|
throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "FullSortingMergeJoin doesn't support specified query");
|
2022-05-02 16:27:43 +00:00
|
|
|
|
2022-04-26 20:22:57 +00:00
|
|
|
const auto & onexpr = table_join->getOnlyClause();
|
|
|
|
for (size_t i = 0; i < onexpr.key_names_left.size(); ++i)
|
|
|
|
{
|
|
|
|
DataTypePtr left_type = left_block.getByName(onexpr.key_names_left[i]).type;
|
|
|
|
DataTypePtr right_type = right_sample_block.getByName(onexpr.key_names_right[i]).type;
|
|
|
|
|
2022-05-02 16:27:43 +00:00
|
|
|
bool type_equals
|
|
|
|
= table_join->hasUsing() ? left_type->equals(*right_type) : removeNullable(left_type)->equals(*removeNullable(right_type));
|
|
|
|
|
2022-07-05 16:21:35 +00:00
|
|
|
/// Even slightly different types should be converted on previous pipeline steps.
|
|
|
|
/// If we still have some differences, we can't join, because the algorithm expects strict type equality.
|
2022-05-02 16:27:43 +00:00
|
|
|
if (!type_equals)
|
2022-04-26 20:22:57 +00:00
|
|
|
{
|
|
|
|
throw DB::Exception(
|
2022-05-02 15:54:42 +00:00
|
|
|
ErrorCodes::TYPE_MISMATCH,
|
2022-04-26 20:22:57 +00:00
|
|
|
"Type mismatch of columns to JOIN by: {} :: {} at left, {} :: {} at right",
|
|
|
|
onexpr.key_names_left[i], left_type->getName(),
|
|
|
|
onexpr.key_names_right[i], right_type->getName());
|
|
|
|
}
|
|
|
|
}
|
2022-03-30 10:07:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Used just to get result header
|
|
|
|
void joinBlock(Block & block, std::shared_ptr<ExtraBlock> & /* not_processed */) override
|
|
|
|
{
|
2022-04-01 18:20:58 +00:00
|
|
|
for (const auto & col : right_sample_block)
|
|
|
|
block.insert(col);
|
|
|
|
block = materializeBlock(block).cloneEmpty();
|
2022-03-30 10:07:09 +00:00
|
|
|
}
|
|
|
|
|
2022-04-01 18:20:58 +00:00
|
|
|
void setTotals(const Block & block) override { totals = block; }
|
|
|
|
const Block & getTotals() const override { return totals; }
|
2022-03-30 10:07:09 +00:00
|
|
|
|
2022-07-05 16:21:35 +00:00
|
|
|
size_t getTotalRowCount() const override
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::getTotalRowCount should not be called");
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t getTotalByteCount() const override
|
|
|
|
{
|
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::getTotalByteCount should not be called");
|
|
|
|
}
|
|
|
|
|
|
|
|
bool alwaysReturnsEmptySet() const override { return false; }
|
2022-03-30 10:07:09 +00:00
|
|
|
|
2022-10-04 08:20:13 +00:00
|
|
|
std::unique_ptr<NotJoinedBlocks>
|
2022-03-30 10:07:09 +00:00
|
|
|
getNonJoinedBlocks(const Block & /* left_sample_block */, const Block & /* result_sample_block */, UInt64 /* max_block_size */) const override
|
|
|
|
{
|
2022-07-05 16:21:35 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "FullSortingMergeJoin::getNonJoinedBlocks should not be called");
|
2022-03-30 10:07:09 +00:00
|
|
|
}
|
|
|
|
|
2022-07-05 16:21:35 +00:00
|
|
|
/// Left and right streams have the same priority and are processed simultaneously
|
2022-07-12 13:19:28 +00:00
|
|
|
JoinPipelineType pipelineType() const override { return JoinPipelineType::YShaped; }
|
2022-03-30 10:07:09 +00:00
|
|
|
|
|
|
|
private:
|
2022-04-05 10:12:42 +00:00
|
|
|
std::shared_ptr<TableJoin> table_join;
|
2022-04-01 18:20:58 +00:00
|
|
|
Block right_sample_block;
|
|
|
|
Block totals;
|
2022-03-30 10:07:09 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|