2020-04-07 09:48:47 +00:00
|
|
|
#include <Interpreters/TableJoin.h>
|
2018-11-02 18:53:23 +00:00
|
|
|
|
2021-06-29 09:22:53 +00:00
|
|
|
|
2021-07-21 17:03:33 +00:00
|
|
|
#include <Common/StringUtils/StringUtils.h>
|
2018-11-02 18:53:23 +00:00
|
|
|
|
2019-09-02 19:58:45 +00:00
|
|
|
#include <Core/Block.h>
|
2020-09-08 10:40:53 +00:00
|
|
|
#include <Core/ColumnsWithTypeAndName.h>
|
2021-07-21 17:03:33 +00:00
|
|
|
#include <Core/Settings.h>
|
2020-04-12 03:20:15 +00:00
|
|
|
|
2019-09-02 19:58:45 +00:00
|
|
|
#include <DataTypes/DataTypeNullable.h>
|
2021-06-26 13:59:07 +00:00
|
|
|
|
2021-06-29 09:22:53 +00:00
|
|
|
#include <Dictionaries/DictionaryStructure.h>
|
|
|
|
|
|
|
|
#include <Interpreters/DictionaryReader.h>
|
|
|
|
#include <Interpreters/ExternalDictionariesLoader.h>
|
|
|
|
|
2021-07-21 17:03:33 +00:00
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/queryToString.h>
|
|
|
|
|
2021-06-28 14:12:15 +00:00
|
|
|
#include <Storages/IStorage.h>
|
2021-06-29 09:22:53 +00:00
|
|
|
#include <Storages/StorageDictionary.h>
|
|
|
|
#include <Storages/StorageJoin.h>
|
|
|
|
|
|
|
|
#include <common/logger_useful.h>
|
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
|
2018-11-02 18:53:23 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-02-09 15:28:06 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int TYPE_MISMATCH;
|
2021-06-29 09:22:53 +00:00
|
|
|
extern const int LOGICAL_ERROR;
|
2021-02-09 15:28:06 +00:00
|
|
|
}
|
|
|
|
|
2021-06-26 13:59:07 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
|
|
|
std::string formatTypeMap(const TableJoin::NameToTypeMap & target, const TableJoin::NameToTypeMap & source)
|
|
|
|
{
|
|
|
|
std::vector<std::string> text;
|
|
|
|
for (const auto & [k, v] : target)
|
|
|
|
{
|
|
|
|
auto src_type_it = source.find(k);
|
|
|
|
std::string src_type_name = src_type_it != source.end() ? src_type_it->second->getName() : "";
|
|
|
|
text.push_back(fmt::format("{} : {} -> {}", k, src_type_name, v->getName()));
|
|
|
|
}
|
|
|
|
return fmt::format("{}", fmt::join(text, ", "));
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
TableJoin::TableJoin(const Settings & settings, VolumePtr tmp_volume_)
|
|
|
|
: size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode})
|
|
|
|
, default_max_bytes(settings.default_max_bytes_in_join)
|
|
|
|
, join_use_nulls(settings.join_use_nulls)
|
|
|
|
, max_joined_block_rows(settings.max_joined_block_size_rows)
|
|
|
|
, join_algorithm(settings.join_algorithm)
|
|
|
|
, partial_merge_join_optimizations(settings.partial_merge_join_optimizations)
|
|
|
|
, partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks)
|
|
|
|
, partial_merge_join_left_table_buffer_bytes(settings.partial_merge_join_left_table_buffer_bytes)
|
|
|
|
, max_files_to_merge(settings.join_on_disk_max_files_to_merge)
|
2020-04-28 13:55:50 +00:00
|
|
|
, temporary_files_codec(settings.temporary_files_codec)
|
2021-09-02 11:40:04 +00:00
|
|
|
, left_clauses(1)
|
|
|
|
, right_clauses(1)
|
2020-01-19 14:26:28 +00:00
|
|
|
, tmp_volume(tmp_volume_)
|
2020-02-19 19:11:23 +00:00
|
|
|
{
|
|
|
|
}
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
void TableJoin::resetCollected()
|
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
left_clauses = std::vector<JoinOnClause>(1);
|
|
|
|
right_clauses = std::vector<JoinOnClause>(1);
|
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
columns_from_joined_table.clear();
|
|
|
|
columns_added_by_join.clear();
|
|
|
|
original_names.clear();
|
|
|
|
renames.clear();
|
2021-03-05 14:41:39 +00:00
|
|
|
left_type_map.clear();
|
|
|
|
right_type_map.clear();
|
2021-06-25 12:03:10 +00:00
|
|
|
key_names_left.resize(1);
|
|
|
|
key_names_right.resize(1);
|
|
|
|
on_filter_condition_asts_left.resize(1);
|
|
|
|
on_filter_condition_asts_right.resize(1);
|
2021-03-05 14:34:43 +00:00
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
void TableJoin::addUsingKey(const ASTPtr & ast)
|
2019-02-13 15:18:02 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
left_clauses.back().addKey(ast->getColumnName(), ast);
|
|
|
|
right_clauses.back().addKey(renamedRightColumnName(ast->getAliasOrColumnName()), ast);
|
2019-02-13 15:18:02 +00:00
|
|
|
}
|
|
|
|
|
2021-09-02 11:40:04 +00:00
|
|
|
/// create new disjunct when see a direct child of a previously discovered OR
|
2021-07-23 19:55:36 +00:00
|
|
|
void TableJoin::addDisjunct(const ASTPtr & ast)
|
2021-06-25 12:03:10 +00:00
|
|
|
{
|
2021-07-23 19:55:36 +00:00
|
|
|
const IAST * addr = ast.get();
|
|
|
|
|
|
|
|
if (std::find_if(disjuncts.begin(), disjuncts.end(), [addr](const ASTPtr & ast_){return ast_.get() == addr;}) != disjuncts.end())
|
2021-06-25 12:03:10 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
if (!left_clauses.back().key_names.empty() || !left_clauses.back().on_filter_conditions.empty() || !right_clauses.back().on_filter_conditions.empty())
|
2021-06-25 12:03:10 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
left_clauses.emplace_back();
|
|
|
|
right_clauses.emplace_back();
|
2021-06-25 12:03:10 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-07-23 19:55:36 +00:00
|
|
|
/// remember OR's children
|
2021-08-05 11:35:16 +00:00
|
|
|
void TableJoin::setDisjuncts(Disjuncts&& disjuncts_)
|
2021-06-25 12:03:10 +00:00
|
|
|
{
|
2021-07-23 19:55:36 +00:00
|
|
|
disjuncts = std::move(disjuncts_);
|
2021-06-25 12:03:10 +00:00
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast)
|
2019-02-13 15:18:02 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
left_clauses.back().addKey(left_table_ast->getColumnName(), left_table_ast);
|
|
|
|
right_clauses.back().addKey(right_table_ast->getAliasOrColumnName(), right_table_ast);
|
2019-02-13 15:18:02 +00:00
|
|
|
}
|
|
|
|
|
2019-05-13 18:58:15 +00:00
|
|
|
/// @return how many times right key appears in ON section.
|
2020-04-07 09:48:47 +00:00
|
|
|
size_t TableJoin::rightKeyInclusion(const String & name) const
|
2019-05-13 18:58:15 +00:00
|
|
|
{
|
2021-03-05 14:34:43 +00:00
|
|
|
if (hasUsing())
|
2019-05-13 18:58:15 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
size_t count = 0;
|
2021-09-02 11:40:04 +00:00
|
|
|
for (const auto & clause : right_clauses)
|
|
|
|
count += std::count(clause.key_names.begin(), clause.key_names.end(), name);
|
2019-05-13 18:58:15 +00:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
void TableJoin::deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix)
|
2019-07-30 18:39:37 +00:00
|
|
|
{
|
|
|
|
NameSet joined_columns;
|
|
|
|
NamesAndTypesList dedup_columns;
|
|
|
|
|
|
|
|
for (auto & column : columns_from_joined_table)
|
|
|
|
{
|
|
|
|
if (joined_columns.count(column.name))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
joined_columns.insert(column.name);
|
|
|
|
|
|
|
|
dedup_columns.push_back(column);
|
|
|
|
auto & inserted = dedup_columns.back();
|
|
|
|
|
2020-04-12 03:20:15 +00:00
|
|
|
/// Also qualify unusual column names - that does not look like identifiers.
|
|
|
|
|
|
|
|
if (left_table_columns.count(column.name) || !isValidIdentifierBegin(column.name.at(0)))
|
2019-07-30 18:39:37 +00:00
|
|
|
inserted.name = right_table_prefix + column.name;
|
|
|
|
|
|
|
|
original_names[inserted.name] = column.name;
|
|
|
|
if (inserted.name != column.name)
|
|
|
|
renames[column.name] = inserted.name;
|
|
|
|
}
|
|
|
|
|
|
|
|
columns_from_joined_table.swap(dedup_columns);
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
NamesWithAliases TableJoin::getNamesWithAliases(const NameSet & required_columns) const
|
2019-07-30 18:39:37 +00:00
|
|
|
{
|
2019-09-04 16:20:02 +00:00
|
|
|
NamesWithAliases out;
|
2019-07-30 18:39:37 +00:00
|
|
|
for (const auto & column : required_columns)
|
2019-02-13 19:00:52 +00:00
|
|
|
{
|
2019-07-30 18:39:37 +00:00
|
|
|
auto it = original_names.find(column);
|
|
|
|
if (it != original_names.end())
|
2019-09-04 16:20:02 +00:00
|
|
|
out.emplace_back(it->second, it->first); /// {original_name, name}
|
2019-02-13 19:00:52 +00:00
|
|
|
}
|
2019-07-30 18:39:37 +00:00
|
|
|
return out;
|
2018-11-02 18:53:23 +00:00
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
ASTPtr TableJoin::leftKeysList() const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
|
|
|
ASTPtr keys_list = std::make_shared<ASTExpressionList>();
|
2021-09-02 11:40:04 +00:00
|
|
|
for (size_t i = 0; i < left_clauses.size(); ++i)
|
|
|
|
{
|
|
|
|
const auto & clause = left_clauses[i];
|
|
|
|
keys_list->children.insert(keys_list->children.end(), clause.key_asts.begin(), clause.key_asts.end());
|
|
|
|
if (ASTPtr extra_cond = joinConditionColumn(JoinTableSide::Left, i))
|
2021-06-25 12:03:10 +00:00
|
|
|
keys_list->children.push_back(extra_cond);
|
2021-09-02 11:40:04 +00:00
|
|
|
}
|
2019-09-02 19:58:45 +00:00
|
|
|
return keys_list;
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
ASTPtr TableJoin::rightKeysList() const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
|
|
|
ASTPtr keys_list = std::make_shared<ASTExpressionList>();
|
2021-09-02 11:40:04 +00:00
|
|
|
for (size_t i = 0; i < right_clauses.size(); ++i)
|
|
|
|
{
|
|
|
|
if (hasOn())
|
|
|
|
{
|
|
|
|
const auto & clause = right_clauses[i];
|
|
|
|
keys_list->children.insert(keys_list->children.end(), clause.key_asts.begin(), clause.key_asts.end());
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ASTPtr extra_cond = joinConditionColumn(JoinTableSide::Right, i))
|
2021-06-25 12:03:10 +00:00
|
|
|
keys_list->children.push_back(extra_cond);
|
2021-09-02 11:40:04 +00:00
|
|
|
}
|
2019-09-02 19:58:45 +00:00
|
|
|
return keys_list;
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
Names TableJoin::requiredJoinedNames() const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
2021-06-25 12:03:10 +00:00
|
|
|
NameSet required_columns_set;
|
2021-09-02 11:40:04 +00:00
|
|
|
for (const auto & clause : right_clauses)
|
|
|
|
required_columns_set.insert(clause.key_names.begin(), clause.key_names.end());
|
|
|
|
|
2019-09-02 19:58:45 +00:00
|
|
|
for (const auto & joined_column : columns_added_by_join)
|
|
|
|
required_columns_set.insert(joined_column.name);
|
|
|
|
|
|
|
|
return Names(required_columns_set.begin(), required_columns_set.end());
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
NameSet TableJoin::requiredRightKeys() const
|
2019-09-10 18:39:10 +00:00
|
|
|
{
|
2019-09-11 15:57:09 +00:00
|
|
|
NameSet required;
|
2021-09-02 11:40:04 +00:00
|
|
|
for (const auto & clause : right_clauses)
|
2021-05-10 14:56:12 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
for (const auto & name : clause.key_names)
|
2021-06-25 12:03:10 +00:00
|
|
|
{
|
|
|
|
auto rename = renamedRightColumnName(name);
|
|
|
|
for (const auto & column : columns_added_by_join)
|
|
|
|
if (rename == column.name)
|
|
|
|
required.insert(name);
|
|
|
|
}
|
2021-05-10 14:56:12 +00:00
|
|
|
}
|
2019-09-10 18:39:10 +00:00
|
|
|
return required;
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
NamesWithAliases TableJoin::getRequiredColumns(const Block & sample, const Names & action_required_columns) const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
2019-09-04 16:20:02 +00:00
|
|
|
NameSet required_columns(action_required_columns.begin(), action_required_columns.end());
|
|
|
|
|
|
|
|
for (auto & column : requiredJoinedNames())
|
2021-06-25 12:03:10 +00:00
|
|
|
{
|
2019-09-02 19:58:45 +00:00
|
|
|
if (!sample.has(column))
|
|
|
|
required_columns.insert(column);
|
2021-06-25 12:03:10 +00:00
|
|
|
}
|
2019-09-02 19:58:45 +00:00
|
|
|
|
2019-09-04 16:20:02 +00:00
|
|
|
return getNamesWithAliases(required_columns);
|
2019-09-02 19:58:45 +00:00
|
|
|
}
|
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
Block TableJoin::getRequiredRightKeys(const Block & right_table_keys, std::vector<String> & keys_sources) const
|
|
|
|
{
|
2021-06-25 12:03:10 +00:00
|
|
|
const auto & left_keys = keyNamesLeft();
|
|
|
|
const auto & right_keys = keyNamesRight();
|
2021-06-30 09:40:35 +00:00
|
|
|
NameSet required_keys = requiredRightKeys();
|
2021-03-05 14:34:43 +00:00
|
|
|
Block required_right_keys;
|
|
|
|
|
2021-06-25 12:03:10 +00:00
|
|
|
for (size_t p = 0; p < right_keys.size(); ++p)
|
2021-03-05 14:34:43 +00:00
|
|
|
{
|
2021-06-25 12:03:10 +00:00
|
|
|
const auto & right_keys_part = right_keys[p];
|
2021-03-05 14:34:43 +00:00
|
|
|
|
2021-06-25 12:03:10 +00:00
|
|
|
for (size_t i = 0; i < right_keys_part.size(); ++i)
|
2021-03-05 14:34:43 +00:00
|
|
|
{
|
2021-06-25 12:03:10 +00:00
|
|
|
const String & right_key_name = right_keys_part[i];
|
|
|
|
|
|
|
|
if (required_keys.count(right_key_name) && !required_right_keys.has(right_key_name))
|
|
|
|
{
|
|
|
|
const auto & right_key = right_table_keys.getByName(right_key_name);
|
|
|
|
required_right_keys.insert(right_key);
|
|
|
|
const auto & left_keys_part = left_keys[p];
|
|
|
|
keys_sources.push_back(left_keys_part[i]);
|
|
|
|
}
|
2021-03-05 14:34:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return required_right_keys;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TableJoin::leftBecomeNullable(const DataTypePtr & column_type) const
|
|
|
|
{
|
2021-04-17 19:03:32 +00:00
|
|
|
return forceNullableLeft() && JoinCommon::canBecomeNullable(column_type);
|
2021-03-05 14:34:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool TableJoin::rightBecomeNullable(const DataTypePtr & column_type) const
|
|
|
|
{
|
2021-04-17 19:03:32 +00:00
|
|
|
return forceNullableRight() && JoinCommon::canBecomeNullable(column_type);
|
2021-03-05 14:34:43 +00:00
|
|
|
}
|
2020-04-13 17:03:11 +00:00
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
2021-06-28 13:44:19 +00:00
|
|
|
columns_added_by_join.emplace_back(joined_column);
|
|
|
|
}
|
2021-02-09 13:17:42 +00:00
|
|
|
|
2021-06-28 13:44:19 +00:00
|
|
|
NamesAndTypesList TableJoin::correctedColumnsAddedByJoin() const
|
|
|
|
{
|
|
|
|
NamesAndTypesList result;
|
|
|
|
for (const auto & col : columns_added_by_join)
|
2021-02-18 11:49:32 +00:00
|
|
|
{
|
2021-06-28 13:44:19 +00:00
|
|
|
DataTypePtr type = col.type;
|
|
|
|
if (hasUsing())
|
|
|
|
{
|
|
|
|
if (auto it = right_type_map.find(col.name); it != right_type_map.end())
|
|
|
|
type = it->second;
|
|
|
|
}
|
2021-02-09 13:17:42 +00:00
|
|
|
|
2021-06-28 13:44:19 +00:00
|
|
|
if (rightBecomeNullable(type))
|
|
|
|
type = JoinCommon::convertTypeToNullable(type);
|
|
|
|
result.emplace_back(col.name, type);
|
|
|
|
}
|
2021-02-09 13:17:42 +00:00
|
|
|
|
2021-06-28 13:44:19 +00:00
|
|
|
return result;
|
2021-02-09 13:17:42 +00:00
|
|
|
}
|
|
|
|
|
2021-06-28 13:44:19 +00:00
|
|
|
void TableJoin::addJoinedColumnsAndCorrectTypes(NamesAndTypesList & left_columns, bool correct_nullability)
|
2021-02-09 13:17:42 +00:00
|
|
|
{
|
2021-06-28 13:44:19 +00:00
|
|
|
for (auto & col : left_columns)
|
2021-02-12 20:04:45 +00:00
|
|
|
{
|
2021-03-05 14:34:43 +00:00
|
|
|
if (hasUsing())
|
2021-02-18 11:49:32 +00:00
|
|
|
{
|
2021-06-28 13:44:19 +00:00
|
|
|
/*
|
|
|
|
* Join with `USING` semantic allows to have columns with changed types in result table.
|
|
|
|
* But `JOIN ON` should preserve types from original table.
|
|
|
|
* So we need to know changed types in result tables before further analysis (e.g. analyzeAggregation)
|
|
|
|
* For `JOIN ON expr1 == expr2` we will infer common type later in makeTableJoin,
|
|
|
|
* when part of plan built and types of expression will be known.
|
|
|
|
*/
|
2021-06-29 09:22:53 +00:00
|
|
|
inferJoinKeyCommonType(left_columns, columns_from_joined_table, !isSpecialStorage());
|
2021-06-28 13:44:19 +00:00
|
|
|
|
2021-02-18 11:49:32 +00:00
|
|
|
if (auto it = left_type_map.find(col.name); it != left_type_map.end())
|
|
|
|
col.type = it->second;
|
|
|
|
}
|
2021-06-28 13:44:19 +00:00
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
if (correct_nullability && leftBecomeNullable(col.type))
|
2021-07-28 13:35:02 +00:00
|
|
|
col.type = JoinCommon::convertTypeToNullable(col.type);
|
2021-02-12 20:04:45 +00:00
|
|
|
}
|
2019-09-02 19:58:45 +00:00
|
|
|
|
2021-06-28 13:44:19 +00:00
|
|
|
for (const auto & col : correctedColumnsAddedByJoin())
|
|
|
|
left_columns.emplace_back(col.name, col.type);
|
2019-09-02 19:58:45 +00:00
|
|
|
}
|
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
bool TableJoin::sameStrictnessAndKind(ASTTableJoin::Strictness strictness_, ASTTableJoin::Kind kind_) const
|
|
|
|
{
|
|
|
|
if (strictness_ == strictness() && kind_ == kind())
|
|
|
|
return true;
|
|
|
|
|
|
|
|
/// Compatibility: old ANY INNER == new SEMI LEFT
|
|
|
|
if (strictness_ == ASTTableJoin::Strictness::Semi && isLeft(kind_) &&
|
|
|
|
strictness() == ASTTableJoin::Strictness::RightAny && isInner(kind()))
|
|
|
|
return true;
|
|
|
|
if (strictness() == ASTTableJoin::Strictness::Semi && isLeft(kind()) &&
|
|
|
|
strictness_ == ASTTableJoin::Strictness::RightAny && isInner(kind_))
|
|
|
|
return true;
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TableJoin::allowMergeJoin() const
|
|
|
|
{
|
|
|
|
bool is_any = (strictness() == ASTTableJoin::Strictness::Any);
|
|
|
|
bool is_all = (strictness() == ASTTableJoin::Strictness::All);
|
|
|
|
bool is_semi = (strictness() == ASTTableJoin::Strictness::Semi);
|
|
|
|
|
|
|
|
bool all_join = is_all && (isInner(kind()) || isLeft(kind()) || isRight(kind()) || isFull(kind()));
|
|
|
|
bool special_left = isLeft(kind()) && (is_any || is_semi);
|
2021-06-25 12:03:10 +00:00
|
|
|
|
2021-09-02 11:40:04 +00:00
|
|
|
bool no_ors = (left_clauses.size() == 1);
|
2021-06-25 12:03:10 +00:00
|
|
|
|
|
|
|
return (all_join || special_left) && no_ors;
|
2021-03-05 14:34:43 +00:00
|
|
|
}
|
2020-07-10 18:10:06 +00:00
|
|
|
|
|
|
|
bool TableJoin::needStreamWithNonJoinedRows() const
|
|
|
|
{
|
2021-03-05 14:34:43 +00:00
|
|
|
if (strictness() == ASTTableJoin::Strictness::Asof ||
|
|
|
|
strictness() == ASTTableJoin::Strictness::Semi)
|
2020-07-10 18:10:06 +00:00
|
|
|
return false;
|
2021-03-05 14:34:43 +00:00
|
|
|
return isRightOrFull(kind());
|
2019-09-25 19:17:32 +00:00
|
|
|
}
|
|
|
|
|
2021-06-29 09:22:53 +00:00
|
|
|
static std::optional<String> getDictKeyName(const String & dict_name , ContextPtr context)
|
|
|
|
{
|
|
|
|
auto dictionary = context->getExternalDictionariesLoader().getDictionary(dict_name, context);
|
|
|
|
if (!dictionary)
|
|
|
|
return {};
|
|
|
|
|
|
|
|
if (const auto & structure = dictionary->getStructure(); structure.id)
|
|
|
|
return structure.id->name;
|
|
|
|
return {};
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TableJoin::tryInitDictJoin(const Block & sample_block, ContextPtr context)
|
2020-04-09 20:00:57 +00:00
|
|
|
{
|
2021-09-14 12:04:45 +00:00
|
|
|
using Strictness = ASTTableJoin::Strictness;
|
|
|
|
|
|
|
|
bool allowed_inner = isInner(kind()) && strictness() == Strictness::All;
|
|
|
|
bool allowed_left = isLeft(kind()) && (strictness() == Strictness::Any ||
|
|
|
|
strictness() == Strictness::All ||
|
|
|
|
strictness() == Strictness::Semi ||
|
|
|
|
strictness() == Strictness::Anti);
|
|
|
|
|
2020-04-13 17:03:11 +00:00
|
|
|
/// Support ALL INNER, [ANY | ALL | SEMI | ANTI] LEFT
|
2021-09-14 12:04:45 +00:00
|
|
|
if (!allowed_inner && !allowed_left)
|
2020-04-13 17:03:11 +00:00
|
|
|
return false;
|
|
|
|
|
2021-09-02 11:40:04 +00:00
|
|
|
const Names & right_keys = right_clauses.front().key_names;
|
2021-03-05 14:34:43 +00:00
|
|
|
if (right_keys.size() != 1)
|
2020-04-09 20:00:57 +00:00
|
|
|
return false;
|
|
|
|
|
2020-07-14 14:06:11 +00:00
|
|
|
/// TODO: support 'JOIN ... ON expr(dict_key) = table_key'
|
2021-03-05 14:34:43 +00:00
|
|
|
auto it_key = original_names.find(right_keys[0]);
|
2020-07-14 14:06:11 +00:00
|
|
|
if (it_key == original_names.end())
|
|
|
|
return false;
|
|
|
|
|
2021-06-29 09:22:53 +00:00
|
|
|
if (!right_storage_dictionary)
|
|
|
|
return false;
|
|
|
|
|
2021-06-29 10:11:01 +00:00
|
|
|
auto dict_name = right_storage_dictionary->getDictionaryName();
|
2021-06-29 09:22:53 +00:00
|
|
|
|
|
|
|
auto dict_key = getDictKeyName(dict_name, context);
|
|
|
|
if (!dict_key.has_value() || *dict_key != it_key->second)
|
2020-07-14 14:06:11 +00:00
|
|
|
return false; /// JOIN key != Dictionary key
|
|
|
|
|
2021-06-29 09:22:53 +00:00
|
|
|
Names src_names;
|
|
|
|
NamesAndTypesList dst_columns;
|
2020-04-22 06:01:33 +00:00
|
|
|
for (const auto & col : sample_block)
|
2020-04-09 20:00:57 +00:00
|
|
|
{
|
2021-03-05 14:34:43 +00:00
|
|
|
if (col.name == right_keys[0])
|
2020-04-09 20:00:57 +00:00
|
|
|
continue; /// do not extract key column
|
|
|
|
|
2020-07-14 14:06:11 +00:00
|
|
|
auto it = original_names.find(col.name);
|
|
|
|
if (it != original_names.end())
|
|
|
|
{
|
|
|
|
String original = it->second;
|
|
|
|
src_names.push_back(original);
|
|
|
|
dst_columns.push_back({col.name, col.type});
|
|
|
|
}
|
2020-04-09 20:00:57 +00:00
|
|
|
}
|
2021-06-29 09:22:53 +00:00
|
|
|
dictionary_reader = std::make_shared<DictionaryReader>(dict_name, src_names, dst_columns, context);
|
2020-04-09 20:00:57 +00:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-06-29 09:52:19 +00:00
|
|
|
std::pair<ActionsDAGPtr, ActionsDAGPtr>
|
|
|
|
TableJoin::createConvertingActions(const ColumnsWithTypeAndName & left_sample_columns, const ColumnsWithTypeAndName & right_sample_columns)
|
2021-02-18 11:49:32 +00:00
|
|
|
{
|
2021-06-29 09:52:19 +00:00
|
|
|
inferJoinKeyCommonType(left_sample_columns, right_sample_columns, !isSpecialStorage());
|
2021-06-26 13:59:07 +00:00
|
|
|
|
2021-06-29 09:52:19 +00:00
|
|
|
auto left_converting_actions = applyKeyConvertToTable(left_sample_columns, left_type_map, key_names_left);
|
|
|
|
auto right_converting_actions = applyKeyConvertToTable(right_sample_columns, right_type_map, key_names_right);
|
2021-02-18 11:49:32 +00:00
|
|
|
|
2021-06-29 09:52:19 +00:00
|
|
|
return {left_converting_actions, right_converting_actions};
|
2021-02-18 11:49:32 +00:00
|
|
|
}
|
|
|
|
|
2021-06-28 13:44:19 +00:00
|
|
|
template <typename LeftNamesAndTypes, typename RightNamesAndTypes>
|
|
|
|
bool TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const RightNamesAndTypes & right, bool allow_right)
|
2021-02-09 13:17:42 +00:00
|
|
|
{
|
2021-06-28 13:44:19 +00:00
|
|
|
if (!left_type_map.empty() || !right_type_map.empty())
|
|
|
|
return true;
|
|
|
|
|
2021-06-26 13:59:07 +00:00
|
|
|
NameToTypeMap left_types;
|
2021-02-18 11:49:32 +00:00
|
|
|
for (const auto & col : left)
|
|
|
|
left_types[col.name] = col.type;
|
2021-02-09 13:17:42 +00:00
|
|
|
|
2021-06-26 13:59:07 +00:00
|
|
|
NameToTypeMap right_types;
|
2021-02-18 11:49:32 +00:00
|
|
|
for (const auto & col : right)
|
2021-06-28 13:44:19 +00:00
|
|
|
right_types[renamedRightColumnName(col.name)] = col.type;
|
2021-02-09 13:17:42 +00:00
|
|
|
|
2021-09-02 11:40:04 +00:00
|
|
|
for (size_t d = 0; d < left_clauses.size(); ++d)
|
2021-02-09 13:17:42 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
auto & key_names_left = left_clauses[d].key_names;
|
|
|
|
auto & key_names_right = right_clauses[d].key_names;
|
|
|
|
for (size_t i = 0; i < key_names_left.size(); ++i)
|
2021-02-09 13:17:42 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
auto ltype = left_types.find(key_names_left[i]);
|
|
|
|
auto rtype = right_types.find(key_names_right[i]);
|
2021-06-25 12:03:10 +00:00
|
|
|
if (ltype == left_types.end() || rtype == right_types.end())
|
|
|
|
{
|
|
|
|
/// Name mismatch, give up
|
|
|
|
left_type_map.clear();
|
|
|
|
right_type_map.clear();
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (JoinCommon::typesEqualUpToNullability(ltype->second, rtype->second))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
DataTypePtr supertype;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
/// TODO(vdimir): use getMostSubtype if possible
|
|
|
|
common_type = DB::getLeastSupertype({ltype->second, rtype->second});
|
|
|
|
}
|
|
|
|
catch (DB::Exception & ex)
|
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
throw Exception(
|
|
|
|
"Type mismatch of columns to JOIN by: " +
|
|
|
|
key_names_left[d][i] + ": " + ltype->second->getName() + " at left, " +
|
|
|
|
key_names_right[d][i] + ": " + rtype->second->getName() + " at right. " +
|
|
|
|
"Can't get supertype: " + ex.message(),
|
|
|
|
ErrorCodes::TYPE_MISMATCH);
|
2021-06-25 12:03:10 +00:00
|
|
|
}
|
2021-09-02 11:40:04 +00:00
|
|
|
left_type_map[key_names_left[i]] = right_type_map[key_names_right[i]] = supertype;
|
2021-02-09 13:17:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-08 14:21:50 +00:00
|
|
|
if (!left_type_map.empty() || !right_type_map.empty())
|
|
|
|
{
|
|
|
|
LOG_TRACE(
|
|
|
|
&Poco::Logger::get("TableJoin"),
|
|
|
|
"Infer supertype for joined columns. Left: [{}], Right: [{}]",
|
2021-06-26 13:59:07 +00:00
|
|
|
formatTypeMap(left_type_map, left_types),
|
|
|
|
formatTypeMap(right_type_map, right_types));
|
2021-03-08 14:21:50 +00:00
|
|
|
}
|
|
|
|
|
2021-02-09 13:17:42 +00:00
|
|
|
return !left_type_map.empty();
|
|
|
|
}
|
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
ActionsDAGPtr TableJoin::applyKeyConvertToTable(
|
2021-09-02 11:40:04 +00:00
|
|
|
const ColumnsWithTypeAndName & cols_src, const NameToTypeMap & type_mapping, std::vector<JoinOnClause> & join_clause) const
|
2021-02-18 11:49:32 +00:00
|
|
|
{
|
2021-06-28 13:44:19 +00:00
|
|
|
bool has_some_to_do = false;
|
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
ColumnsWithTypeAndName cols_dst = cols_src;
|
|
|
|
for (auto & col : cols_dst)
|
|
|
|
{
|
|
|
|
if (auto it = type_mapping.find(col.name); it != type_mapping.end())
|
|
|
|
{
|
2021-06-28 13:44:19 +00:00
|
|
|
has_some_to_do = true;
|
2021-03-05 14:34:43 +00:00
|
|
|
col.type = it->second;
|
|
|
|
col.column = nullptr;
|
|
|
|
}
|
|
|
|
}
|
2021-06-28 13:44:19 +00:00
|
|
|
if (!has_some_to_do)
|
|
|
|
return nullptr;
|
2021-03-05 13:38:49 +00:00
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
NameToNameMap key_column_rename;
|
|
|
|
/// Returns converting actions for tables that need to be performed before join
|
|
|
|
auto dag = ActionsDAG::makeConvertingActions(
|
|
|
|
cols_src, cols_dst, ActionsDAG::MatchColumnsMode::Name, true, !hasUsing(), &key_column_rename);
|
2021-03-05 13:38:49 +00:00
|
|
|
|
2021-09-02 11:40:04 +00:00
|
|
|
for (auto & clause : join_clause)
|
2021-03-05 14:34:43 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
for (auto & name : clause.key_names)
|
2021-06-25 12:03:10 +00:00
|
|
|
{
|
|
|
|
const auto it = key_column_rename.find(name);
|
|
|
|
if (it != key_column_rename.end())
|
|
|
|
name = it->second;
|
|
|
|
}
|
2021-03-05 14:34:43 +00:00
|
|
|
}
|
2021-06-25 12:03:10 +00:00
|
|
|
|
2021-03-05 14:34:43 +00:00
|
|
|
return dag;
|
2021-02-18 11:49:32 +00:00
|
|
|
}
|
|
|
|
|
2021-06-29 09:22:53 +00:00
|
|
|
void TableJoin::setStorageJoin(std::shared_ptr<StorageJoin> storage)
|
|
|
|
{
|
|
|
|
if (right_storage_dictionary)
|
|
|
|
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "StorageJoin and Dictionary join are mutually exclusive");
|
|
|
|
right_storage_join = storage;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TableJoin::setStorageJoin(std::shared_ptr<StorageDictionary> storage)
|
|
|
|
{
|
|
|
|
if (right_storage_join)
|
|
|
|
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "StorageJoin and Dictionary join are mutually exclusive");
|
|
|
|
right_storage_dictionary = storage;
|
|
|
|
}
|
|
|
|
|
2021-04-29 14:30:02 +00:00
|
|
|
String TableJoin::renamedRightColumnName(const String & name) const
|
|
|
|
{
|
|
|
|
if (const auto it = renames.find(name); it != renames.end())
|
|
|
|
return it->second;
|
|
|
|
return name;
|
|
|
|
}
|
|
|
|
|
2021-07-21 17:03:33 +00:00
|
|
|
void TableJoin::addJoinCondition(const ASTPtr & ast, bool is_left)
|
|
|
|
{
|
|
|
|
if (is_left)
|
2021-09-02 11:40:04 +00:00
|
|
|
left_clauses.back().on_filter_conditions.push_back(ast);
|
2021-07-21 17:03:33 +00:00
|
|
|
else
|
2021-09-02 11:40:04 +00:00
|
|
|
right_clauses.back().on_filter_conditions.push_back(ast);
|
2021-07-21 17:03:33 +00:00
|
|
|
}
|
|
|
|
|
2021-08-20 14:32:17 +00:00
|
|
|
void TableJoin::leftToRightKeyRemap(
|
|
|
|
const Names & left_keys,
|
|
|
|
const Names & right_keys,
|
|
|
|
const NameSet & required_right_keys,
|
|
|
|
std::unordered_map<String, String> & key_map) const
|
2021-08-06 14:15:11 +00:00
|
|
|
{
|
|
|
|
if (hasUsing())
|
|
|
|
{
|
2021-08-20 14:32:17 +00:00
|
|
|
for (size_t i = 0; i < left_keys.size(); ++i)
|
2021-08-06 14:15:11 +00:00
|
|
|
{
|
2021-08-20 14:32:17 +00:00
|
|
|
const String & left_key_name = left_keys[i];
|
|
|
|
const String & right_key_name = right_keys[i];
|
2021-08-06 14:15:11 +00:00
|
|
|
|
|
|
|
if (!required_right_keys.contains(right_key_name))
|
2021-08-20 14:32:17 +00:00
|
|
|
key_map[left_key_name] = right_key_name;
|
2021-08-06 14:15:11 +00:00
|
|
|
}
|
|
|
|
}
|
2021-08-20 14:32:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::unordered_map<String, String> TableJoin::leftToRightKeyRemap() const
|
|
|
|
{
|
|
|
|
std::unordered_map<String, String> left_to_right_key_remap;
|
|
|
|
if (hasUsing())
|
|
|
|
{
|
|
|
|
const auto & required_right_keys = requiredRightKeys();
|
2021-09-02 11:40:04 +00:00
|
|
|
for (size_t i = 0; i < left_clauses.size(); ++i)
|
|
|
|
TableJoin::leftToRightKeyRemap(left_clauses[i].key_names, right_clauses[i].key_names, required_right_keys, left_to_right_key_remap);
|
2021-08-20 14:32:17 +00:00
|
|
|
}
|
2021-08-06 14:15:11 +00:00
|
|
|
return left_to_right_key_remap;
|
|
|
|
}
|
|
|
|
|
2021-07-21 17:03:33 +00:00
|
|
|
/// Returns all conditions related to one table joined with 'and' function
|
2021-09-02 11:40:04 +00:00
|
|
|
static ASTPtr buildJoinConditionColumn(const ASTs & on_filter_condition_asts)
|
2021-07-21 17:03:33 +00:00
|
|
|
{
|
2021-09-02 11:40:04 +00:00
|
|
|
if (on_filter_condition_asts.empty())
|
2021-07-21 17:03:33 +00:00
|
|
|
return nullptr;
|
2021-09-02 11:40:04 +00:00
|
|
|
|
|
|
|
if (on_filter_condition_asts.size() == 1)
|
|
|
|
return on_filter_condition_asts[0];
|
2021-07-21 17:03:33 +00:00
|
|
|
|
|
|
|
auto function = std::make_shared<ASTFunction>();
|
|
|
|
function->name = "and";
|
|
|
|
function->arguments = std::make_shared<ASTExpressionList>();
|
|
|
|
function->children.push_back(function->arguments);
|
2021-09-02 11:40:04 +00:00
|
|
|
function->arguments->children = on_filter_condition_asts;
|
2021-07-21 17:03:33 +00:00
|
|
|
return function;
|
|
|
|
}
|
|
|
|
|
2021-06-25 12:03:10 +00:00
|
|
|
ASTPtr TableJoin::joinConditionColumn(JoinTableSide side, size_t disjunct) const
|
2021-07-21 17:03:33 +00:00
|
|
|
{
|
|
|
|
if (side == JoinTableSide::Left)
|
2021-09-02 11:40:04 +00:00
|
|
|
return buildJoinConditionColumn(left_clauses[disjunct].on_filter_conditions);
|
|
|
|
return buildJoinConditionColumn(right_clauses[disjunct].on_filter_conditions);
|
2021-07-21 17:03:33 +00:00
|
|
|
}
|
|
|
|
|
2021-06-25 12:03:10 +00:00
|
|
|
std::pair<String, String> TableJoin::joinConditionColumnNames(size_t disjunct) const
|
2021-07-21 17:03:33 +00:00
|
|
|
{
|
|
|
|
std::pair<String, String> res;
|
2021-06-25 12:03:10 +00:00
|
|
|
if (auto cond_ast = joinConditionColumn(JoinTableSide::Left, disjunct))
|
2021-07-21 17:03:33 +00:00
|
|
|
res.first = cond_ast->getColumnName();
|
2021-06-25 12:03:10 +00:00
|
|
|
if (auto cond_ast = joinConditionColumn(JoinTableSide::Right, disjunct))
|
2021-07-21 17:03:33 +00:00
|
|
|
res.second = cond_ast->getColumnName();
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2018-11-02 18:53:23 +00:00
|
|
|
}
|