Refactor NotJoined pt1

This commit is contained in:
vdimir 2021-08-06 17:15:11 +03:00
parent 98eb619b4b
commit afa748c717
No known key found for this signature in database
GPG Key ID: F57B3E10A21DBB31
8 changed files with 125 additions and 148 deletions

View File

@ -1468,40 +1468,23 @@ struct AdderNonJoined
/// Stream from not joined earlier rows of the right table. /// Stream from not joined earlier rows of the right table.
class NonJoinedBlockInputStream : private NotJoined, public IBlockInputStream class NonJoinedBlockInputStream final : public NotJoined
{ {
public: public:
NonJoinedBlockInputStream(const HashJoin & parent_, const Block & result_sample_block_, UInt64 max_block_size_) NonJoinedBlockInputStream(
: NotJoined(*parent_.table_join, const HashJoin & parent_,
parent_.savedBlockSample(), const Block & result_sample_block_,
parent_.right_sample_block, size_t left_columns_count,
result_sample_block_) UInt64 max_block_size_)
: NotJoined(parent_.savedBlockSample(), result_sample_block_,
left_columns_count, parent_.table_join->leftToRightKeyRemap())
, parent(parent_) , parent(parent_)
, max_block_size(max_block_size_) , max_block_size(max_block_size_)
{} {}
String getName() const override { return "NonJoined"; }
Block getHeader() const override { return result_sample_block; }
protected: protected:
Block readImpl() override size_t fillColumns(MutableColumns & columns_right) override
{ {
if (parent.data->blocks.empty())
return Block();
return createBlock();
}
private:
const HashJoin & parent;
UInt64 max_block_size;
std::any position;
std::optional<HashJoin::BlockNullmapList::const_iterator> nulls_position;
Block createBlock()
{
MutableColumns columns_right = saved_block_sample.cloneEmptyColumns();
size_t rows_added = 0; size_t rows_added = 0;
auto fill_callback = [&](auto, auto strictness, auto & map) auto fill_callback = [&](auto, auto strictness, auto & map)
@ -1513,22 +1496,16 @@ private:
throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR); throw Exception("Logical error: unknown JOIN strictness (must be on of: ANY, ALL, ASOF)", ErrorCodes::LOGICAL_ERROR);
fillNullsFromBlocks(columns_right, rows_added); fillNullsFromBlocks(columns_right, rows_added);
if (!rows_added) return rows_added;
return {};
Block res = result_sample_block.cloneEmpty();
addLeftColumns(res, rows_added);
addRightColumns(res, columns_right);
copySameKeys(res);
correctLowcardAndNullability(res);
#ifndef NDEBUG
assertBlocksHaveEqualStructure(res, result_sample_block, getName());
#endif
return res;
} }
private:
const HashJoin & parent;
UInt64 max_block_size;
std::any position;
std::optional<HashJoin::BlockNullmapList::const_iterator> nulls_position;
template <ASTTableJoin::Strictness STRICTNESS, typename Maps> template <ASTTableJoin::Strictness STRICTNESS, typename Maps>
size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right) size_t fillColumnsFromMap(const Maps & maps, MutableColumns & columns_keys_and_right)
{ {
@ -1610,12 +1587,14 @@ private:
BlockInputStreamPtr HashJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const BlockInputStreamPtr HashJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
{ {
if (table_join->strictness() == ASTTableJoin::Strictness::Asof || if (table_join->strictness() == ASTTableJoin::Strictness::Asof ||
table_join->strictness() == ASTTableJoin::Strictness::Semi) table_join->strictness() == ASTTableJoin::Strictness::Semi ||
!isRightOrFull(table_join->kind()))
{
return {}; return {};
}
if (isRightOrFull(table_join->kind())) size_t left_columns_count = result_sample_block.columns() - required_right_keys.columns() - sample_block_with_columns_to_add.columns();
return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, max_block_size); return std::make_shared<NonJoinedBlockInputStream>(*this, result_sample_block, left_columns_count, max_block_size);
return {};
} }
void HashJoin::reuseJoinedData(const HashJoin & join) void HashJoin::reuseJoinedData(const HashJoin & join)

View File

@ -1,7 +1,8 @@
#include <limits> #include <limits>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Core/NamesAndTypes.h> #include <Columns/ColumnLowCardinality.h>
#include <Core/SortCursor.h> #include <Core/SortCursor.h>
#include <DataStreams/TemporaryFileStream.h> #include <DataStreams/TemporaryFileStream.h>
#include <DataStreams/materializeBlock.h> #include <DataStreams/materializeBlock.h>
@ -723,15 +724,7 @@ void MergeJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed)
if (needConditionJoinColumn()) if (needConditionJoinColumn())
block.erase(deriveTempName(mask_column_name_left)); block.erase(deriveTempName(mask_column_name_left));
for (const auto & column_name : lowcard_keys) JoinCommon::restoreLowCardinalityInplace(block, lowcard_keys);
{
if (!block.has(column_name))
continue;
if (auto & col = block.getByName(column_name); !col.type->lowCardinality())
JoinCommon::changeLowCardinalityInplace(col);
}
JoinCommon::restoreLowCardinalityInplace(block);
} }
template <bool in_memory, bool is_all> template <bool in_memory, bool is_all>
@ -1035,55 +1028,25 @@ void MergeJoin::initRightTableWriter()
} }
/// Stream from not joined earlier rows of the right table. /// Stream from not joined earlier rows of the right table.
class NonMergeJoinedBlockInputStream : private NotJoined, public IBlockInputStream class NonMergeJoinedBlockInputStream final : public NotJoined
{ {
public: public:
NonMergeJoinedBlockInputStream(const MergeJoin & parent_, NonMergeJoinedBlockInputStream(const MergeJoin & parent_,
const Block & result_sample_block_, const Block & result_sample_block,
const Names & key_names_right_, size_t left_columns_count,
UInt64 max_block_size_) UInt64 max_block_size_)
: NotJoined(*parent_.table_join, : NotJoined(parent_.modifyRightBlock(parent_.right_sample_block),
parent_.modifyRightBlock(parent_.right_sample_block), result_sample_block,
parent_.right_sample_block, left_columns_count,
result_sample_block_, parent_.table_join->leftToRightKeyRemap())
{}, key_names_right_)
, parent(parent_) , parent(parent_)
, max_block_size(max_block_size_) , max_block_size(max_block_size_)
{} {}
String getName() const override { return "NonMergeJoined"; } String getName() const override { return "NonMergeJoined"; }
Block getHeader() const override { return result_sample_block; }
protected: protected:
Block readImpl() override size_t fillColumns(MutableColumns & columns_right) override
{
if (parent.getRightBlocksCount())
return createBlock();
return {};
}
private:
const MergeJoin & parent;
size_t max_block_size;
size_t block_number = 0;
Block createBlock()
{
MutableColumns columns_right = saved_block_sample.cloneEmptyColumns();
size_t rows_added = fillColumns(columns_right);
if (!rows_added)
return {};
Block res = result_sample_block.cloneEmpty();
addLeftColumns(res, rows_added);
addRightColumns(res, columns_right);
copySameKeys(res);
correctLowcardAndNullability(res);
return res;
}
size_t fillColumns(MutableColumns & columns_right)
{ {
const RowBitmaps & bitmaps = *parent.used_rows_bitmap; const RowBitmaps & bitmaps = *parent.used_rows_bitmap;
size_t rows_added = 0; size_t rows_added = 0;
@ -1127,13 +1090,19 @@ private:
return rows_added; return rows_added;
} }
private:
const MergeJoin & parent;
size_t max_block_size;
size_t block_number = 0;
}; };
BlockInputStreamPtr MergeJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const BlockInputStreamPtr MergeJoin::createStreamWithNonJoinedRows(const Block & result_sample_block, UInt64 max_block_size) const
{ {
size_t left_columns_count = result_sample_block.columns() - right_columns_to_add.columns();
if (table_join->strictness() == ASTTableJoin::Strictness::All && (is_right || is_full)) if (table_join->strictness() == ASTTableJoin::Strictness::All && (is_right || is_full))
return std::make_shared<NonMergeJoinedBlockInputStream>(*this, result_sample_block, key_names_right, max_block_size); return std::make_shared<NonMergeJoinedBlockInputStream>(*this, result_sample_block, left_columns_count, max_block_size);
return {}; return {};
} }

View File

@ -78,6 +78,7 @@ private:
SortDescription right_merge_description; SortDescription right_merge_description;
Block right_sample_block; Block right_sample_block;
Block right_table_keys; Block right_table_keys;
/// Columns from right side of join, both key and additional
Block right_columns_to_add; Block right_columns_to_add;
SortedBlocksWriter::Blocks right_blocks; SortedBlocksWriter::Blocks right_blocks;

View File

@ -472,6 +472,24 @@ void TableJoin::addJoinCondition(const ASTPtr & ast, bool is_left)
on_filter_condition_asts_right.push_back(ast); on_filter_condition_asts_right.push_back(ast);
} }
std::unordered_map<String, String> TableJoin::leftToRightKeyRemap() const
{
std::unordered_map<String, String> left_to_right_key_remap;
if (hasUsing())
{
const auto & required_right_keys = requiredRightKeys();
for (size_t i = 0; i < key_names_left.size(); ++i)
{
const String & left_key_name = key_names_left[i];
const String & right_key_name = key_names_right[i];
if (!required_right_keys.contains(right_key_name))
left_to_right_key_remap[left_key_name] = right_key_name;
}
}
return left_to_right_key_remap;
}
/// Returns all conditions related to one table joined with 'and' function /// Returns all conditions related to one table joined with 'and' function
static ASTPtr buildJoinConditionColumn(const ASTs & on_filter_condition_asts) static ASTPtr buildJoinConditionColumn(const ASTs & on_filter_condition_asts)
{ {

View File

@ -230,6 +230,7 @@ public:
Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const; Block getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const;
String renamedRightColumnName(const String & name) const; String renamedRightColumnName(const String & name) const;
std::unordered_map<String, String> leftToRightKeyRemap() const;
}; };
} }

View File

@ -314,8 +314,16 @@ void removeLowCardinalityInplace(Block & block, const Names & names, bool change
} }
} }
void restoreLowCardinalityInplace(Block & block) void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys)
{ {
for (const auto & column_name : lowcard_keys)
{
if (!block.has(column_name))
continue;
if (auto & col = block.getByName(column_name); !col.type->lowCardinality())
JoinCommon::changeLowCardinalityInplace(col);
}
for (size_t i = 0; i < block.columns(); ++i) for (size_t i = 0; i < block.columns(); ++i)
{ {
auto & col = block.getByPosition(i); auto & col = block.getByPosition(i);
@ -484,49 +492,21 @@ void splitAdditionalColumns(const Names & key_names, const Block & sample_block,
} }
NotJoined::NotJoined(const Block & saved_block_sample_,
NotJoined::NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block, const Block & result_sample_block_,
const Block & result_sample_block_, const Names & key_names_left_, const Names & key_names_right_) size_t left_columns_count,
const LeftToRightKeyRemap & left_to_right_key_remap)
: saved_block_sample(saved_block_sample_) : saved_block_sample(saved_block_sample_)
, result_sample_block(materializeBlock(result_sample_block_)) , result_sample_block(materializeBlock(result_sample_block_))
, key_names_left(key_names_left_.empty() ? table_join.keyNamesLeft() : key_names_left_)
, key_names_right(key_names_right_.empty() ? table_join.keyNamesRight() : key_names_right_)
{ {
std::vector<String> tmp;
Block right_table_keys;
Block sample_block_with_columns_to_add;
JoinCommon::splitAdditionalColumns(key_names_right, right_sample_block, right_table_keys,
sample_block_with_columns_to_add);
Block required_right_keys = table_join.getRequiredRightKeys(right_table_keys, tmp);
std::unordered_map<size_t, size_t> left_to_right_key_remap;
if (table_join.hasUsing())
{
for (size_t i = 0; i < key_names_left.size(); ++i)
{
const String & left_key_name = key_names_left[i];
const String & right_key_name = key_names_right[i];
size_t left_key_pos = result_sample_block.getPositionByName(left_key_name);
size_t right_key_pos = saved_block_sample.getPositionByName(right_key_name);
if (!required_right_keys.has(right_key_name))
left_to_right_key_remap[left_key_pos] = right_key_pos;
}
}
/// result_sample_block: left_sample_block + left expressions, right not key columns, required right keys
size_t left_columns_count = result_sample_block.columns() -
sample_block_with_columns_to_add.columns() - required_right_keys.columns();
for (size_t left_pos = 0; left_pos < left_columns_count; ++left_pos) for (size_t left_pos = 0; left_pos < left_columns_count; ++left_pos)
{ {
/// We need right 'x' for 'RIGHT JOIN ... USING(x)'. /// We need right 'x' for 'RIGHT JOIN ... USING(x)'
if (left_to_right_key_remap.count(left_pos)) auto left_name = result_sample_block.getByPosition(left_pos).name;
const auto & right_key = left_to_right_key_remap.find(left_name);
if (right_key != left_to_right_key_remap.end())
{ {
size_t right_key_pos = left_to_right_key_remap[left_pos]; size_t right_key_pos = saved_block_sample.getPositionByName(right_key->second);
setRightIndex(right_key_pos, left_pos); setRightIndex(right_key_pos, left_pos);
} }
else else
@ -558,7 +538,7 @@ NotJoined::NotJoined(const TableJoin & table_join, const Block & saved_block_sam
void NotJoined::setRightIndex(size_t right_pos, size_t result_position) void NotJoined::setRightIndex(size_t right_pos, size_t result_position)
{ {
if (!column_indices_right.count(right_pos)) if (!column_indices_right.contains(right_pos))
{ {
column_indices_right[right_pos] = result_position; column_indices_right[right_pos] = result_position;
extractColumnChanges(right_pos, result_position); extractColumnChanges(right_pos, result_position);

View File

@ -5,6 +5,7 @@
#include <Interpreters/IJoin.h> #include <Interpreters/IJoin.h>
#include <Interpreters/ActionsDAG.h> #include <Interpreters/ActionsDAG.h>
#include <Interpreters/ExpressionActions.h> #include <Interpreters/ExpressionActions.h>
#include <DataStreams/IBlockInputStream.h>
namespace DB namespace DB
{ {
@ -30,7 +31,7 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names);
ColumnRawPtrs getRawPointers(const Columns & columns); ColumnRawPtrs getRawPointers(const Columns & columns);
void removeLowCardinalityInplace(Block & block); void removeLowCardinalityInplace(Block & block);
void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type = true); void removeLowCardinalityInplace(Block & block, const Names & names, bool change_type = true);
void restoreLowCardinalityInplace(Block & block); void restoreLowCardinalityInplace(Block & block, const Names & lowcard_keys);
ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right); ColumnRawPtrs extractKeysForJoin(const Block & block_keys, const Names & key_names_right);
@ -64,40 +65,69 @@ void changeLowCardinalityInplace(ColumnWithTypeAndName & column);
} }
/// Creates result from right table data in RIGHT and FULL JOIN when keys are not present in left table. /// Creates result from right table data in RIGHT and FULL JOIN when keys are not present in left table.
class NotJoined class NotJoined : public IBlockInputStream
{ {
public: public:
NotJoined(const TableJoin & table_join, const Block & saved_block_sample_, const Block & right_sample_block, using LeftToRightKeyRemap = std::unordered_map<String, String>;
const Block & result_sample_block_, const Names & key_names_left_ = {}, const Names & key_names_right_ = {});
NotJoined(const Block & saved_block_sample_,
const Block & result_sample_block_,
size_t left_columns_count,
const LeftToRightKeyRemap & left_to_right_key_remap);
String getName() const override { return "NonJoined"; }
Block getHeader() const override { return result_sample_block; }
protected:
Block readImpl() override final
{
Block result = saved_block_sample.cloneEmpty();
MutableColumns columns_right = result.mutateColumns();
size_t rows_added = fillColumns(columns_right);
if (rows_added == 0)
return {};
Block res = result_sample_block.cloneEmpty();
addLeftColumns(res, rows_added);
addRightColumns(res, columns_right);
copySameKeys(res);
correctLowcardAndNullability(res);
#ifndef NDEBUG
assertBlocksHaveEqualStructure(res, result_sample_block, getName());
#endif
return res;
}
virtual size_t fillColumns(MutableColumns & columns_right) = 0;
private:
void extractColumnChanges(size_t right_pos, size_t result_pos);
void correctLowcardAndNullability(Block & block); void correctLowcardAndNullability(Block & block);
void addLeftColumns(Block & block, size_t rows_added) const; void addLeftColumns(Block & block, size_t rows_added) const;
void addRightColumns(Block & block, MutableColumns & columns_right) const; void addRightColumns(Block & block, MutableColumns & columns_right) const;
void copySameKeys(Block & block) const; void copySameKeys(Block & block) const;
protected: /// Right block saved in Join
Block saved_block_sample; Block saved_block_sample;
/// Output of join
Block result_sample_block; Block result_sample_block;
Names key_names_left;
Names key_names_right;
~NotJoined() = default;
private:
/// Indices of columns in result_sample_block that should be generated /// Indices of columns in result_sample_block that should be generated
std::vector<size_t> column_indices_left; std::vector<size_t> column_indices_left;
/// Indices of columns that come from the right-side table: right_pos -> result_pos /// Indices of columns that come from the right-side table: right_pos -> result_pos
std::unordered_map<size_t, size_t> column_indices_right; std::unordered_map<size_t, size_t> column_indices_right;
///
std::unordered_map<size_t, size_t> same_result_keys; std::unordered_map<size_t, size_t> same_result_keys;
/// Which right columns (saved in parent) need nullability change before placing them in result block
/// Which right columns (saved in parent) need Nullability/LowCardinality change
/// before placing them in result block
std::vector<std::pair<size_t, bool>> right_nullability_changes; std::vector<std::pair<size_t, bool>> right_nullability_changes;
/// Which right columns (saved in parent) need LowCardinality change before placing them in result block
std::vector<std::pair<size_t, bool>> right_lowcard_changes; std::vector<std::pair<size_t, bool>> right_lowcard_changes;
void setRightIndex(size_t right_pos, size_t result_position); void setRightIndex(size_t right_pos, size_t result_position);
void extractColumnChanges(size_t right_pos, size_t result_pos);
}; };
} }

View File

@ -1,7 +1,6 @@
#include <Processors/Transforms/JoiningTransform.h> #include <Processors/Transforms/JoiningTransform.h>
#include <Interpreters/ExpressionAnalyzer.h> #include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/join_common.h> #include <Interpreters/join_common.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataStreams/IBlockInputStream.h> #include <DataStreams/IBlockInputStream.h>