Refactor NotJoined pt1

This commit is contained in:
vdimir 2021-08-06 17:15:11 +03:00
parent 98eb619b4b
commit afa748c717
No known key found for this signature in database
GPG Key ID: F57B3E10A21DBB31
8 changed files with 125 additions and 148 deletions

View File

@ -1468,40 +1468,23 @@ struct AdderNonJoined
/// Stream from not joined earlier rows of the right table.
class NonJoinedBlockInputStream : private NotJoined, public IBlockInputStream
class NonJoinedBlockInputStream final : public NotJoined
{
public:
NonJoinedBlockInputStream(const HashJoin & parent_, const Block & result_sample_block_, UInt64 max_block_size_)
: NotJoined(*parent_.table_join,
parent_.savedBlockSample(),
parent_.right_sample_block,
result_sample_block_)
NonJoinedBlockInputStream(
const HashJoin & parent_,
const Block & result_sample_block_,
size_t left_columns_count,
UInt64 max_block_size_)
: NotJoined(parent_.savedBlockSample(), result_sample_block_,
left_columns_count, parent_.table_join->leftToRightKeyRemap())
, parent(parent_)
, max_block_size(max_block_size_)
{}
String getName() const override { return "NonJoined"; }
Block getHeader() const override { return result_sample_block; }
protected:
Block readImpl() override
size_t fillColumns(MutableColumns & columns_right) override
{
if (parent.data->blocks.empty())
return Block();
return createBlock();
}
private:
const HashJoin & parent;
UInt64 max_block_size;
std::any position;
std::optional<HashJoin::BlockNullmapList::const_iterator> nulls_position;
Block createBlock()
{
MutableColumns columns_right = saved_block_sample.cloneEmptyColumns();
size_t rows_added = 0;
auto fill_callback = [&](auto, auto strictness, auto & map)
@ -1513,22 +1496,16 @@ private:
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
fillNullsFromBlocks(columns_right, rows_added);
if (!rows_added)
return {};
Block res = result_sample_block.cloneEmpty();
addLeftColumns(res, rows_added);
addRightColumns(res, columns_right);
copySameKeys(res);
correctLowcardAndNullability(res);
#ifndef NDEBUG
assertBlocksHaveEqualStructure(res, result_sample_block, getName());
#endif
return res;
return rows_added;
}
private:
const HashJoin & parent;
UInt64 max_block_size;
std::any position;
std::optional<HashJoin::BlockNullmapList::const_iterator> nulls_position;
template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
{
@ -1610,12 +1587,14 @@ private:
BlockInputStreamPtr HashJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
{
if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
table_join->strictness() == ASTTableJoin::Strictness::Semi)
table_join->strictness() == ASTTableJoin::Strictness::Semi ||
!isRightOrFull(table_join->kind()))
{
return {};
}
if (isRightOrFull(table_join->kind()))
return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, max_block_size);
return {};
size_t left_columns_count = result_sample_block.columns() - required_right_keys.columns() - sample_block_with_columns_to_add.columns();
return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, left_columns_count, max_block_size);
}
void HashJoin::reuseJoinedData(const HashJoin & join)

View File

@ -1,7 +1,8 @@
#include <limits>
#include <Columns/ColumnNullable.h>
#include <Core/NamesAndTypes.h>
#include <Columns/ColumnLowCardinality.h>
#include <Core/SortCursor.h>
#include <DataStreams/TemporaryFileStream.h>
#include <DataStreams/materializeBlock.h>
@ -723,15 +724,7 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
if (needConditionJoinColumn())
block.erase(deriveTempName(mask_column_name_left));
for (const auto & column_name : lowcard_keys)
{
if (!block.has(column_name))
continue;
if (auto & col = block.getByName(column_name); !col.type->lowCardinality())
JoinCommon::changeLowCardinalityInplace(col);
}
JoinCommon::restoreLowCardinalityInplace(block);
JoinCommon::restoreLowCardinalityInplace(block, lowcard_keys);
}
template <bool in_memory, bool is_all>
@ -1035,55 +1028,25 @@ void MergeJoin::initRightTableWriter()
}
/// Stream from not joined earlier rows of the right table.
class NonMergeJoinedBlockInputStream : private NotJoined, public IBlockInputStream
class NonMergeJoinedBlockInputStream final : public NotJoined
{
public:
NonMergeJoinedBlockInputStream(const MergeJoin & parent_,
const Block & result_sample_block_,
const Names & key_names_right_,
const Block & result_sample_block,
size_t left_columns_count,
UInt64 max_block_size_)
: NotJoined(*parent_.table_join,
parent_.modifyRightBlock(parent_.right_sample_block),
parent_.right_sample_block,
result_sample_block_,
{}, key_names_right_)
: NotJoined(parent_.modifyRightBlock(parent_.right_sample_block),
result_sample_block,
left_columns_count,
parent_.table_join->leftToRightKeyRemap())
, parent(parent_)
, max_block_size(max_block_size_)
{}
String getName() const override { return "NonMergeJoined"; }
Block getHeader() const override { return result_sample_block; }
protected:
Block readImpl() override
{
if (parent.getRightBlocksCount())
return createBlock();
return {};
}
private:
const MergeJoin & parent;
size_t max_block_size;
size_t block_number = 0;
Block createBlock()
{
MutableColumns columns_right = saved_block_sample.cloneEmptyColumns();
size_t rows_added = fillColumns(columns_right);
if (!rows_added)
return {};
Block res = result_sample_block.cloneEmpty();
addLeftColumns(res, rows_added);
addRightColumns(res, columns_right);
copySameKeys(res);
correctLowcardAndNullability(res);
return res;
}
size_t fillColumns(MutableColumns & columns_right)
size_t fillColumns(MutableColumns & columns_right) override
{
const RowBitmaps & bitmaps = *parent.used_rows_bitmap;
size_t rows_added = 0;
@ -1127,13 +1090,19 @@ private:
return rows_added;
}
private:
const MergeJoin & parent;
size_t max_block_size;
size_t block_number = 0;
};
BlockInputStreamPtr MergeJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
{
size_t left_columns_count = result_sample_block.columns() - right_columns_to_add.columns();
if (table_join->strictness() == ASTTableJoin::Strictness::All && (is_right || is_full))
return std::make_shared<NonMergeJoinedBlockInputStream>(*this, result_sample_block, key_names_right, max_block_size);
return std::make_shared<NonMergeJoinedBlockInputStream>(*this, result_sample_block, left_columns_count, max_block_size);
return {};
}

View File

@ -78,6 +78,7 @@ private:
SortDescription right_merge_description;
Block right_sample_block;
Block right_table_keys;
/// Columns from right side of join, both key and additional
Block right_columns_to_add;
SortedBlocksWriter::Blocks right_blocks;

View File

@ -472,6 +472,24 @@ void TableJoin::addJoinCondition(const ASTPtr & ast, bool is_left)
on_filter_condition_asts_right.push_back(ast);
}
std::unordered_map<String, String> TableJoin::leftToRightKeyRemap() const
{
std::unordered_map<String, String> left_to_right_key_remap;
if (hasUsing())
{
const auto & required_right_keys = requiredRightKeys();
for (size_t i = 0; i < key_names_left.size(); ++i)
{
const String & left_key_name = key_names_left[i];
const String & right_key_name = key_names_right[i];
if (!required_right_keys.contains(right_key_name))
left_to_right_key_remap[left_key_name] = right_key_name;
}
}
return left_to_right_key_remap;
}
/// Returns all conditions related to one table joined with 'and' function
static ASTPtr buildJoinConditionColumn(const ASTs & on_filter_condition_asts)
{

View File

@ -230,6 +230,7 @@ public:
Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const;
String renamedRightColumnName(const String & name) const;
std::unordered_map<String, String> leftToRightKeyRemap() const;
};
}

View File

@ -314,8 +314,16 @@ void removeLowCardinalityInplace(Block & block, const Names & names, bool change
}
}
void restoreLowCardinalityInplace(Block & block)
void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys)
{
for (const auto & column_name : lowcard_keys)
{
if (!block.has(column_name))
continue;
if (auto & col = block.getByName(column_name); !col.type->lowCardinality())
JoinCommon::changeLowCardinalityInplace(col);
}
for (size_t i = 0; i < block.columns(); ++i)
{
auto & col = block.getByPosition(i);
@ -484,49 +492,21 @@ void splitAdditionalColumns(const Names & key_names, const Block & sample_block,
}
NotJoined::NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block,
const Block & result_sample_block_, const Names & key_names_left_, const Names & key_names_right_)
NotJoined::NotJoined(const Block & saved_block_sample_,
const Block & result_sample_block_,
size_t left_columns_count,
const LeftToRightKeyRemap & left_to_right_key_remap)
: saved_block_sample(saved_block_sample_)
, result_sample_block(materializeBlock(result_sample_block_))
, key_names_left(key_names_left_.empty() ? table_join.keyNamesLeft() : key_names_left_)
, key_names_right(key_names_right_.empty() ? table_join.keyNamesRight() : key_names_right_)
{
std::vector<String> tmp;
Block right_table_keys;
Block sample_block_with_columns_to_add;
JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys,
sample_block_with_columns_to_add);
Block required_right_keys = table_join.getRequiredRightKeys(right_table_keys, tmp);
std::unordered_map<size_t, size_t> left_to_right_key_remap;
if (table_join.hasUsing())
{
for (size_t i = 0; i < key_names_left.size(); ++i)
{
const String & left_key_name = key_names_left[i];
const String & right_key_name = key_names_right[i];
size_t left_key_pos = result_sample_block.getPositionByName(left_key_name);
size_t right_key_pos = saved_block_sample.getPositionByName(right_key_name);
if (!required_right_keys.has(right_key_name))
left_to_right_key_remap[left_key_pos] = right_key_pos;
}
}
/// result_sample_block: left_sample_block + left expressions, right not key columns, required right keys
size_t left_columns_count = result_sample_block.columns() -
sample_block_with_columns_to_add.columns() - required_right_keys.columns();
for (size_t left_pos = 0; left_pos < left_columns_count; ++left_pos)
{
/// We need right 'x' for 'RIGHT JOIN ... USING(x)'.
if (left_to_right_key_remap.count(left_pos))
/// We need right 'x' for 'RIGHT JOIN ... USING(x)'
auto left_name = result_sample_block.getByPosition(left_pos).name;
const auto & right_key = left_to_right_key_remap.find(left_name);
if (right_key != left_to_right_key_remap.end())
{
size_t right_key_pos = left_to_right_key_remap[left_pos];
size_t right_key_pos = saved_block_sample.getPositionByName(right_key->second);
setRightIndex(right_key_pos, left_pos);
}
else
@ -558,7 +538,7 @@ NotJoined::NotJoined(const TableJoin & table_join, const Block & saved_block_sam
void NotJoined::setRightIndex(size_t right_pos, size_t result_position)
{
if (!column_indices_right.count(right_pos))
if (!column_indices_right.contains(right_pos))
{
column_indices_right[right_pos] = result_position;
extractColumnChanges(right_pos, result_position);

View File

@ -5,6 +5,7 @@
#include <Interpreters/IJoin.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/ExpressionActions.h>
#include <DataStreams/IBlockInputStream.h>
namespace DB
{
@ -30,7 +31,7 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names);
ColumnRawPtrs getRawPointers(const Columns & columns);
void removeLowCardinalityInplace(Block & block);
void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type = true);
void restoreLowCardinalityInplace(Block & block);
void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys);
ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right);
@ -64,40 +65,69 @@ void changeLowCardinalityInplace(ColumnWithTypeAndName & column);
}
/// Creates result from right table data in RIGHT and FULL JOIN when keys are not present in left table.
class NotJoined
class NotJoined : public IBlockInputStream
{
public:
NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block,
const Block & result_sample_block_, const Names & key_names_left_ = {}, const Names & key_names_right_ = {});
using LeftToRightKeyRemap = std::unordered_map<String, String>;
NotJoined(const Block & saved_block_sample_,
const Block & result_sample_block_,
size_t left_columns_count,
const LeftToRightKeyRemap & left_to_right_key_remap);
String getName() const override { return "NonJoined"; }
Block getHeader() const override { return result_sample_block; }
protected:
Block readImpl() override final
{
Block result = saved_block_sample.cloneEmpty();
MutableColumns columns_right = result.mutateColumns();
size_t rows_added = fillColumns(columns_right);
if (rows_added == 0)
return {};
Block res = result_sample_block.cloneEmpty();
addLeftColumns(res, rows_added);
addRightColumns(res, columns_right);
copySameKeys(res);
correctLowcardAndNullability(res);
#ifndef NDEBUG
assertBlocksHaveEqualStructure(res, result_sample_block, getName());
#endif
return res;
}
virtual size_t fillColumns(MutableColumns & columns_right) = 0;
private:
void extractColumnChanges(size_t right_pos, size_t result_pos);
void correctLowcardAndNullability(Block & block);
void addLeftColumns(Block & block, size_t rows_added) const;
void addRightColumns(Block & block, MutableColumns & columns_right) const;
void copySameKeys(Block & block) const;
protected:
/// Right block saved in Join
Block saved_block_sample;
/// Output of join
Block result_sample_block;
Names key_names_left;
Names key_names_right;
~NotJoined() = default;
private:
/// Indices of columns in result_sample_block that should be generated
std::vector<size_t> column_indices_left;
/// Indices of columns that come from the right-side table: right_pos -> result_pos
std::unordered_map<size_t, size_t> column_indices_right;
///
std::unordered_map<size_t, size_t> same_result_keys;
/// Which right columns (saved in parent) need nullability change before placing them in result block
/// Which right columns (saved in parent) need Nullability/LowCardinality change
/// before placing them in result block
std::vector<std::pair<size_t, bool>> right_nullability_changes;
/// Which right columns (saved in parent) need LowCardinality change before placing them in result block
std::vector<std::pair<size_t, bool>> right_lowcard_changes;
void setRightIndex(size_t right_pos, size_t result_position);
void extractColumnChanges(size_t right_pos, size_t result_pos);
};
}

View File

@ -1,7 +1,6 @@
#include <Processors/Transforms/JoiningTransform.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/join_common.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataStreams/IBlockInputStream.h>