mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-25 09:02:00 +00:00
Refactor NotJoined pt1
This commit is contained in:
parent
98eb619b4b
commit
afa748c717
@ -1468,40 +1468,23 @@ struct AdderNonJoined
|
||||
|
||||
|
||||
/// Stream from not joined earlier rows of the right table.
|
||||
class NonJoinedBlockInputStream : private NotJoined, public IBlockInputStream
|
||||
class NonJoinedBlockInputStream final : public NotJoined
|
||||
{
|
||||
public:
|
||||
NonJoinedBlockInputStream(const HashJoin & parent_, const Block & result_sample_block_, UInt64 max_block_size_)
|
||||
: NotJoined(*parent_.table_join,
|
||||
parent_.savedBlockSample(),
|
||||
parent_.right_sample_block,
|
||||
result_sample_block_)
|
||||
NonJoinedBlockInputStream(
|
||||
const HashJoin & parent_,
|
||||
const Block & result_sample_block_,
|
||||
size_t left_columns_count,
|
||||
UInt64 max_block_size_)
|
||||
: NotJoined(parent_.savedBlockSample(), result_sample_block_,
|
||||
left_columns_count, parent_.table_join->leftToRightKeyRemap())
|
||||
, parent(parent_)
|
||||
, max_block_size(max_block_size_)
|
||||
{}
|
||||
|
||||
String getName() const override { return "NonJoined"; }
|
||||
Block getHeader() const override { return result_sample_block; }
|
||||
|
||||
protected:
|
||||
Block readImpl() override
|
||||
size_t fillColumns(MutableColumns & columns_right) override
|
||||
{
|
||||
if (parent.data->blocks.empty())
|
||||
return Block();
|
||||
return createBlock();
|
||||
}
|
||||
|
||||
private:
|
||||
const HashJoin & parent;
|
||||
UInt64 max_block_size;
|
||||
|
||||
std::any position;
|
||||
std::optional<HashJoin::BlockNullmapList::const_iterator> nulls_position;
|
||||
|
||||
Block createBlock()
|
||||
{
|
||||
MutableColumns columns_right = saved_block_sample.cloneEmptyColumns();
|
||||
|
||||
size_t rows_added = 0;
|
||||
|
||||
auto fill_callback = [&](auto, auto strictness, auto & map)
|
||||
@ -1513,22 +1496,16 @@ private:
|
||||
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
fillNullsFromBlocks(columns_right, rows_added);
|
||||
if (!rows_added)
|
||||
return {};
|
||||
|
||||
Block res = result_sample_block.cloneEmpty();
|
||||
addLeftColumns(res, rows_added);
|
||||
addRightColumns(res, columns_right);
|
||||
copySameKeys(res);
|
||||
correctLowcardAndNullability(res);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assertBlocksHaveEqualStructure(res, result_sample_block, getName());
|
||||
#endif
|
||||
|
||||
return res;
|
||||
return rows_added;
|
||||
}
|
||||
|
||||
private:
|
||||
const HashJoin & parent;
|
||||
UInt64 max_block_size;
|
||||
|
||||
std::any position;
|
||||
std::optional<HashJoin::BlockNullmapList::const_iterator> nulls_position;
|
||||
|
||||
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
|
||||
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
|
||||
{
|
||||
@ -1610,12 +1587,14 @@ private:
|
||||
BlockInputStreamPtr HashJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
|
||||
{
|
||||
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
|
||||
table_join->strictness() == ASTTableJoin::Strictness::Semi)
|
||||
table_join->strictness() == ASTTableJoin::Strictness::Semi ||
|
||||
!isRightOrFull(table_join->kind()))
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
if (isRightOrFull(table_join->kind()))
|
||||
return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, max_block_size);
|
||||
return {};
|
||||
size_t left_columns_count = result_sample_block.columns() - required_right_keys.columns() - sample_block_with_columns_to_add.columns();
|
||||
return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, left_columns_count, max_block_size);
|
||||
}
|
||||
|
||||
void HashJoin::reuseJoinedData(const HashJoin & join)
|
||||
|
@ -1,7 +1,8 @@
|
||||
#include <limits>
|
||||
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
|
||||
#include <Core/SortCursor.h>
|
||||
#include <DataStreams/TemporaryFileStream.h>
|
||||
#include <DataStreams/materializeBlock.h>
|
||||
@ -723,15 +724,7 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
|
||||
if (needConditionJoinColumn())
|
||||
block.erase(deriveTempName(mask_column_name_left));
|
||||
|
||||
for (const auto & column_name : lowcard_keys)
|
||||
{
|
||||
if (!block.has(column_name))
|
||||
continue;
|
||||
if (auto & col = block.getByName(column_name); !col.type->lowCardinality())
|
||||
JoinCommon::changeLowCardinalityInplace(col);
|
||||
}
|
||||
|
||||
JoinCommon::restoreLowCardinalityInplace(block);
|
||||
JoinCommon::restoreLowCardinalityInplace(block, lowcard_keys);
|
||||
}
|
||||
|
||||
template <bool in_memory, bool is_all>
|
||||
@ -1035,55 +1028,25 @@ void MergeJoin::initRightTableWriter()
|
||||
}
|
||||
|
||||
/// Stream from not joined earlier rows of the right table.
|
||||
class NonMergeJoinedBlockInputStream : private NotJoined, public IBlockInputStream
|
||||
class NonMergeJoinedBlockInputStream final : public NotJoined
|
||||
{
|
||||
public:
|
||||
NonMergeJoinedBlockInputStream(const MergeJoin & parent_,
|
||||
const Block & result_sample_block_,
|
||||
const Names & key_names_right_,
|
||||
const Block & result_sample_block,
|
||||
size_t left_columns_count,
|
||||
UInt64 max_block_size_)
|
||||
: NotJoined(*parent_.table_join,
|
||||
parent_.modifyRightBlock(parent_.right_sample_block),
|
||||
parent_.right_sample_block,
|
||||
result_sample_block_,
|
||||
{}, key_names_right_)
|
||||
: NotJoined(parent_.modifyRightBlock(parent_.right_sample_block),
|
||||
result_sample_block,
|
||||
left_columns_count,
|
||||
parent_.table_join->leftToRightKeyRemap())
|
||||
, parent(parent_)
|
||||
, max_block_size(max_block_size_)
|
||||
{}
|
||||
|
||||
String getName() const override { return "NonMergeJoined"; }
|
||||
Block getHeader() const override { return result_sample_block; }
|
||||
|
||||
protected:
|
||||
Block readImpl() override
|
||||
{
|
||||
if (parent.getRightBlocksCount())
|
||||
return createBlock();
|
||||
return {};
|
||||
}
|
||||
|
||||
private:
|
||||
const MergeJoin & parent;
|
||||
size_t max_block_size;
|
||||
size_t block_number = 0;
|
||||
|
||||
Block createBlock()
|
||||
{
|
||||
MutableColumns columns_right = saved_block_sample.cloneEmptyColumns();
|
||||
|
||||
size_t rows_added = fillColumns(columns_right);
|
||||
if (!rows_added)
|
||||
return {};
|
||||
|
||||
Block res = result_sample_block.cloneEmpty();
|
||||
addLeftColumns(res, rows_added);
|
||||
addRightColumns(res, columns_right);
|
||||
copySameKeys(res);
|
||||
correctLowcardAndNullability(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t fillColumns(MutableColumns & columns_right)
|
||||
size_t fillColumns(MutableColumns & columns_right) override
|
||||
{
|
||||
const RowBitmaps & bitmaps = *parent.used_rows_bitmap;
|
||||
size_t rows_added = 0;
|
||||
@ -1127,13 +1090,19 @@ private:
|
||||
|
||||
return rows_added;
|
||||
}
|
||||
|
||||
private:
|
||||
const MergeJoin & parent;
|
||||
size_t max_block_size;
|
||||
size_t block_number = 0;
|
||||
};
|
||||
|
||||
|
||||
BlockInputStreamPtr MergeJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
|
||||
{
|
||||
size_t left_columns_count = result_sample_block.columns() - right_columns_to_add.columns();
|
||||
if (table_join->strictness() == ASTTableJoin::Strictness::All && (is_right || is_full))
|
||||
return std::make_shared<NonMergeJoinedBlockInputStream>(*this, result_sample_block, key_names_right, max_block_size);
|
||||
return std::make_shared<NonMergeJoinedBlockInputStream>(*this, result_sample_block, left_columns_count, max_block_size);
|
||||
return {};
|
||||
}
|
||||
|
||||
|
@ -78,6 +78,7 @@ private:
|
||||
SortDescription right_merge_description;
|
||||
Block right_sample_block;
|
||||
Block right_table_keys;
|
||||
/// Columns from right side of join, both key and additional
|
||||
Block right_columns_to_add;
|
||||
SortedBlocksWriter::Blocks right_blocks;
|
||||
|
||||
|
@ -472,6 +472,24 @@ void TableJoin::addJoinCondition(const ASTPtr & ast, bool is_left)
|
||||
on_filter_condition_asts_right.push_back(ast);
|
||||
}
|
||||
|
||||
std::unordered_map<String, String> TableJoin::leftToRightKeyRemap() const
|
||||
{
|
||||
std::unordered_map<String, String> left_to_right_key_remap;
|
||||
if (hasUsing())
|
||||
{
|
||||
const auto & required_right_keys = requiredRightKeys();
|
||||
for (size_t i = 0; i < key_names_left.size(); ++i)
|
||||
{
|
||||
const String & left_key_name = key_names_left[i];
|
||||
const String & right_key_name = key_names_right[i];
|
||||
|
||||
if (!required_right_keys.contains(right_key_name))
|
||||
left_to_right_key_remap[left_key_name] = right_key_name;
|
||||
}
|
||||
}
|
||||
return left_to_right_key_remap;
|
||||
}
|
||||
|
||||
/// Returns all conditions related to one table joined with 'and' function
|
||||
static ASTPtr buildJoinConditionColumn(const ASTs & on_filter_condition_asts)
|
||||
{
|
||||
|
@ -230,6 +230,7 @@ public:
|
||||
Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const;
|
||||
|
||||
String renamedRightColumnName(const String & name) const;
|
||||
std::unordered_map<String, String> leftToRightKeyRemap() const;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -314,8 +314,16 @@ void removeLowCardinalityInplace(Block & block, const Names & names, bool change
|
||||
}
|
||||
}
|
||||
|
||||
void restoreLowCardinalityInplace(Block & block)
|
||||
void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys)
|
||||
{
|
||||
for (const auto & column_name : lowcard_keys)
|
||||
{
|
||||
if (!block.has(column_name))
|
||||
continue;
|
||||
if (auto & col = block.getByName(column_name); !col.type->lowCardinality())
|
||||
JoinCommon::changeLowCardinalityInplace(col);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < block.columns(); ++i)
|
||||
{
|
||||
auto & col = block.getByPosition(i);
|
||||
@ -484,49 +492,21 @@ void splitAdditionalColumns(const Names & key_names, const Block & sample_block,
|
||||
|
||||
}
|
||||
|
||||
|
||||
NotJoined::NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block,
|
||||
const Block & result_sample_block_, const Names & key_names_left_, const Names & key_names_right_)
|
||||
NotJoined::NotJoined(const Block & saved_block_sample_,
|
||||
const Block & result_sample_block_,
|
||||
size_t left_columns_count,
|
||||
const LeftToRightKeyRemap & left_to_right_key_remap)
|
||||
: saved_block_sample(saved_block_sample_)
|
||||
, result_sample_block(materializeBlock(result_sample_block_))
|
||||
, key_names_left(key_names_left_.empty() ? table_join.keyNamesLeft() : key_names_left_)
|
||||
, key_names_right(key_names_right_.empty() ? table_join.keyNamesRight() : key_names_right_)
|
||||
{
|
||||
std::vector<String> tmp;
|
||||
Block right_table_keys;
|
||||
Block sample_block_with_columns_to_add;
|
||||
|
||||
JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys,
|
||||
sample_block_with_columns_to_add);
|
||||
Block required_right_keys = table_join.getRequiredRightKeys(right_table_keys, tmp);
|
||||
|
||||
std::unordered_map<size_t, size_t> left_to_right_key_remap;
|
||||
|
||||
if (table_join.hasUsing())
|
||||
{
|
||||
for (size_t i = 0; i < key_names_left.size(); ++i)
|
||||
{
|
||||
const String & left_key_name = key_names_left[i];
|
||||
const String & right_key_name = key_names_right[i];
|
||||
|
||||
size_t left_key_pos = result_sample_block.getPositionByName(left_key_name);
|
||||
size_t right_key_pos = saved_block_sample.getPositionByName(right_key_name);
|
||||
|
||||
if (!required_right_keys.has(right_key_name))
|
||||
left_to_right_key_remap[left_key_pos] = right_key_pos;
|
||||
}
|
||||
}
|
||||
|
||||
/// result_sample_block: left_sample_block + left expressions, right not key columns, required right keys
|
||||
size_t left_columns_count = result_sample_block.columns() -
|
||||
sample_block_with_columns_to_add.columns() - required_right_keys.columns();
|
||||
|
||||
for (size_t left_pos = 0; left_pos < left_columns_count; ++left_pos)
|
||||
{
|
||||
/// We need right 'x' for 'RIGHT JOIN ... USING(x)'.
|
||||
if (left_to_right_key_remap.count(left_pos))
|
||||
/// We need right 'x' for 'RIGHT JOIN ... USING(x)'
|
||||
auto left_name = result_sample_block.getByPosition(left_pos).name;
|
||||
const auto & right_key = left_to_right_key_remap.find(left_name);
|
||||
if (right_key != left_to_right_key_remap.end())
|
||||
{
|
||||
size_t right_key_pos = left_to_right_key_remap[left_pos];
|
||||
size_t right_key_pos = saved_block_sample.getPositionByName(right_key->second);
|
||||
setRightIndex(right_key_pos, left_pos);
|
||||
}
|
||||
else
|
||||
@ -558,7 +538,7 @@ NotJoined::NotJoined(const TableJoin & table_join, const Block & saved_block_sam
|
||||
|
||||
void NotJoined::setRightIndex(size_t right_pos, size_t result_position)
|
||||
{
|
||||
if (!column_indices_right.count(right_pos))
|
||||
if (!column_indices_right.contains(right_pos))
|
||||
{
|
||||
column_indices_right[right_pos] = result_position;
|
||||
extractColumnChanges(right_pos, result_position);
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <Interpreters/IJoin.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -30,7 +31,7 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names);
|
||||
ColumnRawPtrs getRawPointers(const Columns & columns);
|
||||
void removeLowCardinalityInplace(Block & block);
|
||||
void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type = true);
|
||||
void restoreLowCardinalityInplace(Block & block);
|
||||
void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys);
|
||||
|
||||
ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right);
|
||||
|
||||
@ -64,40 +65,69 @@ void changeLowCardinalityInplace(ColumnWithTypeAndName & column);
|
||||
}
|
||||
|
||||
/// Creates result from right table data in RIGHT and FULL JOIN when keys are not present in left table.
|
||||
class NotJoined
|
||||
class NotJoined : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block,
|
||||
const Block & result_sample_block_, const Names & key_names_left_ = {}, const Names & key_names_right_ = {});
|
||||
using LeftToRightKeyRemap = std::unordered_map<String, String>;
|
||||
|
||||
NotJoined(const Block & saved_block_sample_,
|
||||
const Block & result_sample_block_,
|
||||
size_t left_columns_count,
|
||||
const LeftToRightKeyRemap & left_to_right_key_remap);
|
||||
|
||||
String getName() const override { return "NonJoined"; }
|
||||
Block getHeader() const override { return result_sample_block; }
|
||||
|
||||
protected:
|
||||
Block readImpl() override final
|
||||
{
|
||||
Block result = saved_block_sample.cloneEmpty();
|
||||
MutableColumns columns_right = result.mutateColumns();
|
||||
|
||||
size_t rows_added = fillColumns(columns_right);
|
||||
if (rows_added == 0)
|
||||
return {};
|
||||
|
||||
Block res = result_sample_block.cloneEmpty();
|
||||
addLeftColumns(res, rows_added);
|
||||
addRightColumns(res, columns_right);
|
||||
copySameKeys(res);
|
||||
correctLowcardAndNullability(res);
|
||||
|
||||
#ifndef NDEBUG
|
||||
assertBlocksHaveEqualStructure(res, result_sample_block, getName());
|
||||
#endif
|
||||
return res;
|
||||
}
|
||||
|
||||
virtual size_t fillColumns(MutableColumns & columns_right) = 0;
|
||||
|
||||
private:
|
||||
void extractColumnChanges(size_t right_pos, size_t result_pos);
|
||||
void correctLowcardAndNullability(Block & block);
|
||||
void addLeftColumns(Block & block, size_t rows_added) const;
|
||||
void addRightColumns(Block & block, MutableColumns & columns_right) const;
|
||||
void copySameKeys(Block & block) const;
|
||||
|
||||
protected:
|
||||
/// Right block saved in Join
|
||||
Block saved_block_sample;
|
||||
|
||||
/// Output of join
|
||||
Block result_sample_block;
|
||||
|
||||
Names key_names_left;
|
||||
Names key_names_right;
|
||||
|
||||
~NotJoined() = default;
|
||||
|
||||
private:
|
||||
/// Indices of columns in result_sample_block that should be generated
|
||||
std::vector<size_t> column_indices_left;
|
||||
/// Indices of columns that come from the right-side table: right_pos -> result_pos
|
||||
std::unordered_map<size_t, size_t> column_indices_right;
|
||||
///
|
||||
|
||||
std::unordered_map<size_t, size_t> same_result_keys;
|
||||
/// Which right columns (saved in parent) need nullability change before placing them in result block
|
||||
|
||||
/// Which right columns (saved in parent) need Nullability/LowCardinality change
|
||||
/// before placing them in result block
|
||||
std::vector<std::pair<size_t, bool>> right_nullability_changes;
|
||||
/// Which right columns (saved in parent) need LowCardinality change before placing them in result block
|
||||
std::vector<std::pair<size_t, bool>> right_lowcard_changes;
|
||||
|
||||
void setRightIndex(size_t right_pos, size_t result_position);
|
||||
void extractColumnChanges(size_t right_pos, size_t result_pos);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
#include <Processors/Transforms/JoiningTransform.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h>
|
||||
#include <Interpreters/join_common.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user