better nullability flags for JOINs

This commit is contained in:
chertus 2019-09-12 17:09:05 +03:00
parent 441faba0e3
commit f7f2cface9
5 changed files with 31 additions and 29 deletions

View File

@ -77,7 +77,9 @@ public:
ASTTableJoin::Kind kind() const { return table_join.kind; }
ASTTableJoin::Strictness strictness() const { return table_join.strictness; }
const SizeLimits & sizeLimits() const { return size_limits; }
bool joinUseNulls() const { return join_use_nulls; }
bool forceNullabelRight() const { return join_use_nulls && isLeftOrFull(table_join.kind); }
bool forceNullabelLeft() const { return join_use_nulls && isRightOrFull(table_join.kind); }
void addUsingKey(const ASTPtr & ast);
void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast);

View File

@ -66,7 +66,8 @@ Join::Join(const AnalyzedJoin & join_options_, const Block & right_sample_block,
, strictness(join_options_.strictness())
, key_names_right(join_options_.keyNamesRight())
, required_right_keys(join_options_.requiredRightKeys())
, use_nulls(join_options_.joinUseNulls())
, nullable_right_side(join_options_.forceNullabelRight())
, nullable_left_side(join_options_.forceNullabelLeft())
, any_take_last_row(any_take_last_row_)
, log(&Logger::get("Join"))
{
@ -295,8 +296,7 @@ void Join::setSampleBlock(const Block & block)
JoinCommon::createMissedColumns(sample_block_with_columns_to_add);
/// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable.
if (use_nulls && isLeftOrFull(kind))
if (nullable_right_side)
JoinCommon::convertColumnsToNullable(sample_block_with_columns_to_add);
}
@ -471,8 +471,7 @@ bool Join::addJoinedBlock(const Block & block)
/// Rare case, when joined columns are constant. To avoid code bloat, simply materialize them.
materializeBlockInplace(*stored_block);
/// In case of LEFT and FULL joins, if use_nulls, convert joined columns to Nullable.
if (use_nulls && isLeftOrFull(kind))
if (nullable_right_side)
JoinCommon::convertColumnsToNullable(*stored_block, (isFull(kind) ? key_names_right.size() : 0));
if (kind != ASTTableJoin::Kind::Cross)
@ -729,7 +728,7 @@ void Join::joinBlockImpl(
{
materializeBlockInplace(block);
if (use_nulls)
if (nullable_left_side)
JoinCommon::convertColumnsToNullable(block);
}
@ -754,7 +753,6 @@ void Join::joinBlockImpl(
/// Filter & insert missing rows
constexpr bool is_all_join = STRICTNESS == ASTTableJoin::Strictness::All;
constexpr bool inner_or_right = static_in_v<KIND, ASTTableJoin::Kind::Inner, ASTTableJoin::Kind::Right>;
constexpr bool left_or_full = static_in_v<KIND, ASTTableJoin::Kind::Left, ASTTableJoin::Kind::Full>;
std::vector<size_t> right_keys_to_replicate [[maybe_unused]];
@ -773,7 +771,7 @@ void Join::joinBlockImpl(
if (required_right_keys.count(right_key.name) && !block.has(right_key.name))
{
const auto & col = block.getByName(left_name);
bool is_nullable = (use_nulls && left_or_full) || right_key.type->isNullable();
bool is_nullable = nullable_right_side || right_key.type->isNullable();
block.insert(correctNullability({col.column, col.type, right_key.name}, is_nullable));
}
}
@ -807,7 +805,7 @@ void Join::joinBlockImpl(
mut_column->insertDefault();
}
bool is_nullable = (use_nulls && left_or_full) || right_key.type->isNullable();
bool is_nullable = nullable_right_side || right_key.type->isNullable();
block.insert(correctNullability({std::move(mut_column), col.type, right_key.name}, is_nullable, null_map_filter));
if constexpr (is_all_join)
@ -1137,9 +1135,7 @@ private:
std::unordered_map<size_t, size_t> & left_to_right_key_map)
{
result_sample_block = materializeBlock(left_sample_block);
/// Convert left columns to Nullable if allowed
if (parent.use_nulls)
if (parent.nullable_left_side)
JoinCommon::convertColumnsToNullable(result_sample_block);
/// Add columns from the right-side table to the block.
@ -1159,7 +1155,7 @@ private:
if (parent.required_right_keys.count(right_key.name) && !result_sample_block.has(right_key.name))
{
const auto & col = result_sample_block.getByPosition(left_key_pos);
bool is_nullable = (parent.use_nulls && isFull(parent.kind)) || right_key.type->isNullable();
bool is_nullable = (parent.nullable_right_side && isFull(parent.kind)) || right_key.type->isNullable();
result_sample_block.insert(correctNullability({col.column, col.type, right_key.name}, is_nullable));
size_t right_key_pos = result_sample_block.getPositionByName(right_key.name);

View File

@ -128,8 +128,6 @@ public:
bool empty() { return type == Type::EMPTY; }
bool isNullUsedAsDefault() const { return use_nulls; }
/** Add block of data from right hand of JOIN to the map.
* Returns false, if some limit was exceeded and you should not insert more data.
*/
@ -285,8 +283,10 @@ private:
/// Names right-side table keys that are needed in result (would be attached after joined columns).
const NameSet required_right_keys;
/// Substitute NULLs for non-JOINed rows.
bool use_nulls;
/// In case of LEFT and FULL joins, if use_nulls, convert right-side columns to Nullable.
bool nullable_right_side;
/// In case of RIGHT and FULL joins, if use_nulls, convert left-side columns to Nullable.
bool nullable_left_side;
/// Overwrite existing values when encountering the same key again
bool any_take_last_row;

View File

@ -14,10 +14,19 @@ namespace ErrorCodes
MergeJoin::MergeJoin(const AnalyzedJoin & table_join_, const Block & right_sample_block)
: table_join(table_join_)
, required_right_keys(table_join.requiredRightKeys())
, nullable_right_side(table_join_.forceNullabelRight())
{
JoinCommon::extractKeysForJoin(table_join.keyNamesRight(), right_sample_block, right_table_keys, sample_block_with_columns_to_add);
JoinCommon::createMissedColumns(sample_block_with_columns_to_add);
JoinCommon::extractKeysForJoin(table_join.keyNamesRight(), right_sample_block, right_table_keys, right_columns_to_add);
const NameSet required_right_keys = table_join.requiredRightKeys();
for (const auto & column : right_table_keys)
if (required_right_keys.count(column.name))
right_columns_to_add.insert(ColumnWithTypeAndName{nullptr, column.type, column.name});
JoinCommon::createMissedColumns(right_columns_to_add);
if (nullable_right_side)
JoinCommon::convertColumnsToNullable(right_columns_to_add);
}
/// TODO: sort
@ -47,13 +56,8 @@ void MergeJoin::joinBlock(Block & block)
void MergeJoin::addRightColumns(Block & block)
{
size_t rows = block.rows();
for (const auto & column : sample_block_with_columns_to_add)
for (const auto & column : right_columns_to_add)
block.insert(ColumnWithTypeAndName{column.column->cloneResized(rows), column.type, column.name});
for (const auto & column : right_table_keys)
if (required_right_keys.count(column.name))
block.insert(ColumnWithTypeAndName{column.column->cloneResized(rows), column.type, column.name});
}
void MergeJoin::mergeJoin(Block & /*block*/, const Block & /*right_block*/)

View File

@ -26,10 +26,10 @@ public:
private:
mutable std::shared_mutex rwlock;
const AnalyzedJoin & table_join;
const NameSet required_right_keys;
Block right_table_keys;
Block sample_block_with_columns_to_add;
Block right_columns_to_add;
BlocksList right_blocks;
bool nullable_right_side;
size_t right_blocks_row_count = 0;
size_t right_blocks_bytes = 0;