fix UB in join keys order

This commit is contained in:
chertus 2019-08-01 20:27:51 +03:00
parent e5d3f17225
commit 4072214b5d
6 changed files with 27 additions and 47 deletions

View File

@ -126,14 +126,14 @@ NameSet AnalyzedJoin::getOriginalColumnsSet() const
return out;
}
std::unordered_map<String, String> AnalyzedJoin::getOriginalColumnsMap(const NameSet & required_columns) const
Names AnalyzedJoin::getOriginalColumnNames(const Names & required_columns) const
{
std::unordered_map<String, String> out;
Names out;
for (const auto & column : required_columns)
{
auto it = original_names.find(column);
if (it != original_names.end())
out.insert(*it);
out.push_back(it->second);
}
return out;
}

View File

@ -61,7 +61,7 @@ public:
NameSet getQualifiedColumnsSet() const;
NameSet getOriginalColumnsSet() const;
std::unordered_map<String, String> getOriginalColumnsMap(const NameSet & required_columns) const;
Names getOriginalColumnNames(const Names & required_columns) const;
void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix);
void calculateAvailableJoinedColumns(bool make_nullable);

View File

@ -516,15 +516,15 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only
}
static void appendRequiredColumns(
NameSet & required_columns, const Block & sample, const Names & key_names_right, const NamesAndTypesList & columns_added_by_join)
Names & required_columns, const Block & sample, const Names & key_names_right, const NamesAndTypesList & columns_added_by_join)
{
for (auto & column : key_names_right)
if (!sample.has(column))
required_columns.insert(column);
required_columns.push_back(column);
for (auto & column : columns_added_by_join)
if (!sample.has(column.name))
required_columns.insert(column.name);
required_columns.push_back(column.name);
}
bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
@ -606,20 +606,15 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
else if (table_to_join.database_and_table_name)
table = table_to_join.database_and_table_name;
Names action_columns = joined_block_actions->getRequiredColumns();
NameSet required_columns(action_columns.begin(), action_columns.end());
Names required_columns = joined_block_actions->getRequiredColumns();
appendRequiredColumns(
required_columns, joined_block_actions->getSampleBlock(), analyzed_join.key_names_right, columns_added_by_join);
auto original_map = analyzed_join.getOriginalColumnsMap(required_columns);
Names original_columns;
for (auto & pr : original_map)
original_columns.push_back(pr.second);
Names original_columns = analyzed_join.getOriginalColumnNames(required_columns);
auto interpreter = interpretSubquery(table, context, subquery_depth, original_columns);
subquery_for_set.makeSource(interpreter, original_map);
subquery_for_set.makeSource(interpreter, original_columns, required_columns);
}
Block sample_block = subquery_for_set.renamedSampleBlock();

View File

@ -275,18 +275,21 @@ void Join::setSampleBlock(const Block & block)
size_t keys_size = key_names_right.size();
ColumnRawPtrs key_columns(keys_size);
Columns materialized_columns;
sample_block_with_columns_to_add = materializeBlock(block);
for (size_t i = 0; i < keys_size; ++i)
{
auto & column = block.getByName(key_names_right[i]).column;
key_columns[i] = column.get();
auto column_no_lc = recursiveRemoveLowCardinality(column);
if (column.get() != column_no_lc.get())
{
materialized_columns.emplace_back(std::move(column_no_lc));
key_columns[i] = materialized_columns.back().get();
}
const String & column_name = key_names_right[i];
auto & col = sample_block_with_columns_to_add.getByName(column_name);
col.column = recursiveRemoveLowCardinality(col.column);
col.type = recursiveRemoveLowCardinality(col.type);
/// Extract right keys with correct keys order.
sample_block_with_keys.insert(col);
sample_block_with_columns_to_add.erase(column_name);
key_columns[i] = sample_block_with_keys.getColumns().back().get();
/// We will join only keys, where all components are not NULL.
if (auto * nullable = checkAndGetColumn<ColumnNullable>(*key_columns[i]))
@ -327,27 +330,9 @@ void Join::setSampleBlock(const Block & block)
init(chooseMethod(key_columns, key_sizes));
}
sample_block_with_columns_to_add = materializeBlock(block);
blocklist_sample = Block(block.getColumnsWithTypeAndName());
prepareBlockListStructure(blocklist_sample);
/// Move from `sample_block_with_columns_to_add` key columns to `sample_block_with_keys`, keeping the order.
for (size_t pos = 0; pos < sample_block_with_columns_to_add.columns();)
{
auto & col = sample_block_with_columns_to_add.getByPosition(pos);
if (key_names_right.end() != std::find(key_names_right.begin(), key_names_right.end(), col.name))
{
col.column = recursiveRemoveLowCardinality(col.column);
col.type = recursiveRemoveLowCardinality(col.type);
sample_block_with_keys.insert(col);
sample_block_with_columns_to_add.erase(pos);
}
else
++pos;
}
size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
for (size_t i = 0; i < num_columns_to_add; ++i)

View File

@ -7,13 +7,13 @@ namespace DB
{
void SubqueryForSet::makeSource(std::shared_ptr<InterpreterSelectWithUnionQuery> & interpreter,
const std::unordered_map<String, String> & name_to_origin)
const Names & original_names, const Names & required_names)
{
source = std::make_shared<LazyBlockInputStream>(interpreter->getSampleBlock(),
[interpreter]() mutable { return interpreter->execute().in; });
for (const auto & names : name_to_origin)
joined_block_aliases.emplace_back(names.second, names.first);
for (size_t i = 0; i < original_names.size(); ++i)
joined_block_aliases.emplace_back(original_names[i], required_names[i]);
sample_block = source->getHeader();
for (const auto & name_with_alias : joined_block_aliases)

View File

@ -1,5 +1,6 @@
#pragma once
#include <Core/Names.h>
#include <Parsers/IAST.h>
#include <Interpreters/PreparedSets.h>
#include <Interpreters/ExpressionActions.h>
@ -12,7 +13,6 @@ class Join;
using JoinPtr = std::shared_ptr<Join>;
class InterpreterSelectWithUnionQuery;
struct AnalyzedJoin;
/// Information on what to do when executing a subquery in the [GLOBAL] IN/JOIN section.
@ -32,7 +32,7 @@ struct SubqueryForSet
StoragePtr table;
void makeSource(std::shared_ptr<InterpreterSelectWithUnionQuery> & interpreter,
const std::unordered_map<String, String> & name_to_origin);
const Names & original_names, const Names & required_names);
Block renamedSampleBlock() const { return sample_block; }
void renameColumns(Block & block);