2020-04-07 09:48:47 +00:00
|
|
|
#include <Interpreters/TableJoin.h>
|
2018-11-02 18:53:23 +00:00
|
|
|
|
|
|
|
#include <Parsers/ASTExpressionList.h>
|
|
|
|
|
2019-09-09 19:43:37 +00:00
|
|
|
#include <Core/Settings.h>
|
2019-09-02 19:58:45 +00:00
|
|
|
#include <Core/Block.h>
|
2020-09-08 10:40:53 +00:00
|
|
|
#include <Core/ColumnsWithTypeAndName.h>
|
2018-11-02 18:53:23 +00:00
|
|
|
|
2020-04-12 03:20:15 +00:00
|
|
|
#include <Common/StringUtils/StringUtils.h>
|
|
|
|
|
2019-09-02 19:58:45 +00:00
|
|
|
#include <DataTypes/DataTypeNullable.h>
|
|
|
|
|
2018-11-02 18:53:23 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2021-02-09 15:28:06 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int TYPE_MISMATCH;
|
|
|
|
}
|
|
|
|
|
2021-03-05 13:38:49 +00:00
|
|
|
TableJoin::TableJoin(const ASTTableJoin & table_join_ast, const Settings & settings, VolumePtr tmp_volume_)
|
|
|
|
: join_info(table_join_ast, settings)
|
2020-04-28 13:55:50 +00:00
|
|
|
, temporary_files_codec(settings.temporary_files_codec)
|
2020-01-19 14:26:28 +00:00
|
|
|
, tmp_volume(tmp_volume_)
|
2020-02-19 19:11:23 +00:00
|
|
|
{
|
|
|
|
}
|
2019-09-09 19:43:37 +00:00
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
void TableJoin::addUsingKey(const ASTPtr & ast)
|
2019-02-13 15:18:02 +00:00
|
|
|
{
|
|
|
|
key_names_left.push_back(ast->getColumnName());
|
|
|
|
key_names_right.push_back(ast->getAliasOrColumnName());
|
|
|
|
|
|
|
|
key_asts_left.push_back(ast);
|
|
|
|
key_asts_right.push_back(ast);
|
2019-07-30 18:39:37 +00:00
|
|
|
|
|
|
|
auto & right_key = key_names_right.back();
|
|
|
|
if (renames.count(right_key))
|
|
|
|
right_key = renames[right_key];
|
2019-02-13 15:18:02 +00:00
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast)
|
2019-02-13 15:18:02 +00:00
|
|
|
{
|
|
|
|
key_names_left.push_back(left_table_ast->getColumnName());
|
|
|
|
key_names_right.push_back(right_table_ast->getAliasOrColumnName());
|
|
|
|
|
|
|
|
key_asts_left.push_back(left_table_ast);
|
|
|
|
key_asts_right.push_back(right_table_ast);
|
|
|
|
}
|
|
|
|
|
2019-05-13 18:58:15 +00:00
|
|
|
/// @return how many times right key appears in ON section.
|
2020-04-07 09:48:47 +00:00
|
|
|
size_t TableJoin::rightKeyInclusion(const String & name) const
|
2019-05-13 18:58:15 +00:00
|
|
|
{
|
2021-03-05 13:38:49 +00:00
|
|
|
if (!hasOn())
|
2019-05-13 18:58:15 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
size_t count = 0;
|
|
|
|
for (const auto & key_name : key_names_right)
|
|
|
|
if (name == key_name)
|
|
|
|
++count;
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
void TableJoin::deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix)
|
2019-07-30 18:39:37 +00:00
|
|
|
{
|
|
|
|
NameSet joined_columns;
|
|
|
|
NamesAndTypesList dedup_columns;
|
|
|
|
|
|
|
|
for (auto & column : columns_from_joined_table)
|
|
|
|
{
|
|
|
|
if (joined_columns.count(column.name))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
joined_columns.insert(column.name);
|
|
|
|
|
|
|
|
dedup_columns.push_back(column);
|
|
|
|
auto & inserted = dedup_columns.back();
|
|
|
|
|
2020-04-12 03:20:15 +00:00
|
|
|
/// Also qualify unusual column names - that does not look like identifiers.
|
|
|
|
|
|
|
|
if (left_table_columns.count(column.name) || !isValidIdentifierBegin(column.name.at(0)))
|
2019-07-30 18:39:37 +00:00
|
|
|
inserted.name = right_table_prefix + column.name;
|
|
|
|
|
|
|
|
original_names[inserted.name] = column.name;
|
|
|
|
if (inserted.name != column.name)
|
|
|
|
renames[column.name] = inserted.name;
|
|
|
|
}
|
|
|
|
|
|
|
|
columns_from_joined_table.swap(dedup_columns);
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
NamesWithAliases TableJoin::getNamesWithAliases(const NameSet & required_columns) const
|
2019-07-30 18:39:37 +00:00
|
|
|
{
|
2019-09-04 16:20:02 +00:00
|
|
|
NamesWithAliases out;
|
2019-07-30 18:39:37 +00:00
|
|
|
for (const auto & column : required_columns)
|
2019-02-13 19:00:52 +00:00
|
|
|
{
|
2019-07-30 18:39:37 +00:00
|
|
|
auto it = original_names.find(column);
|
|
|
|
if (it != original_names.end())
|
2019-09-04 16:20:02 +00:00
|
|
|
out.emplace_back(it->second, it->first); /// {original_name, name}
|
2019-02-13 19:00:52 +00:00
|
|
|
}
|
2019-07-30 18:39:37 +00:00
|
|
|
return out;
|
2018-11-02 18:53:23 +00:00
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
ASTPtr TableJoin::leftKeysList() const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
|
|
|
ASTPtr keys_list = std::make_shared<ASTExpressionList>();
|
|
|
|
keys_list->children = key_asts_left;
|
|
|
|
return keys_list;
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
ASTPtr TableJoin::rightKeysList() const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
|
|
|
ASTPtr keys_list = std::make_shared<ASTExpressionList>();
|
|
|
|
if (hasOn())
|
|
|
|
keys_list->children = key_asts_right;
|
|
|
|
return keys_list;
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
Names TableJoin::requiredJoinedNames() const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
|
|
|
NameSet required_columns_set(key_names_right.begin(), key_names_right.end());
|
|
|
|
for (const auto & joined_column : columns_added_by_join)
|
|
|
|
required_columns_set.insert(joined_column.name);
|
|
|
|
|
|
|
|
return Names(required_columns_set.begin(), required_columns_set.end());
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
NameSet TableJoin::requiredRightKeys() const
|
2019-09-10 18:39:10 +00:00
|
|
|
{
|
2019-09-11 15:57:09 +00:00
|
|
|
NameSet required;
|
2019-09-10 18:39:10 +00:00
|
|
|
for (const auto & name : key_names_right)
|
2019-09-11 15:57:09 +00:00
|
|
|
for (const auto & column : columns_added_by_join)
|
|
|
|
if (name == column.name)
|
|
|
|
required.insert(name);
|
2019-09-10 18:39:10 +00:00
|
|
|
return required;
|
|
|
|
}
|
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
NamesWithAliases TableJoin::getRequiredColumns(const Block & sample, const Names & action_required_columns) const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
2019-09-04 16:20:02 +00:00
|
|
|
NameSet required_columns(action_required_columns.begin(), action_required_columns.end());
|
|
|
|
|
|
|
|
for (auto & column : requiredJoinedNames())
|
2019-09-02 19:58:45 +00:00
|
|
|
if (!sample.has(column))
|
|
|
|
required_columns.insert(column);
|
|
|
|
|
2019-09-04 16:20:02 +00:00
|
|
|
return getNamesWithAliases(required_columns);
|
2019-09-02 19:58:45 +00:00
|
|
|
}
|
|
|
|
|
2020-04-13 17:03:11 +00:00
|
|
|
|
2020-04-07 09:48:47 +00:00
|
|
|
void TableJoin::addJoinedColumn(const NameAndTypePair & joined_column)
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
2021-02-09 13:17:42 +00:00
|
|
|
DataTypePtr type = joined_column.type;
|
|
|
|
|
2021-03-05 13:38:49 +00:00
|
|
|
if (!hasOn())
|
2021-02-18 11:49:32 +00:00
|
|
|
{
|
|
|
|
if (auto it = right_type_map.find(joined_column.name); it != right_type_map.end())
|
|
|
|
type = it->second;
|
|
|
|
}
|
2021-02-09 13:17:42 +00:00
|
|
|
|
2021-03-05 13:38:49 +00:00
|
|
|
if (join_info.forceNullableRight() && type->canBeInsideNullable())
|
2021-02-09 13:17:42 +00:00
|
|
|
type = makeNullable(joined_column.type);
|
|
|
|
|
|
|
|
columns_added_by_join.emplace_back(joined_column.name, type);
|
|
|
|
}
|
|
|
|
|
|
|
|
void TableJoin::addJoinedColumnsAndCorrectTypes(NamesAndTypesList & names_and_types, bool correct_nullability) const
|
|
|
|
{
|
|
|
|
ColumnsWithTypeAndName columns;
|
|
|
|
for (auto & pair : names_and_types)
|
|
|
|
columns.emplace_back(nullptr, std::move(pair.type), std::move(pair.name));
|
|
|
|
names_and_types.clear();
|
|
|
|
|
|
|
|
addJoinedColumnsAndCorrectTypes(columns, correct_nullability);
|
|
|
|
|
|
|
|
for (auto & col : columns)
|
|
|
|
names_and_types.emplace_back(std::move(col.name), std::move(col.type));
|
2019-09-02 19:58:45 +00:00
|
|
|
}
|
|
|
|
|
2021-02-09 13:17:42 +00:00
|
|
|
void TableJoin::addJoinedColumnsAndCorrectTypes(ColumnsWithTypeAndName & columns, bool correct_nullability) const
|
2019-09-02 19:58:45 +00:00
|
|
|
{
|
2020-09-08 10:40:53 +00:00
|
|
|
for (auto & col : columns)
|
2021-02-12 20:04:45 +00:00
|
|
|
{
|
2021-03-05 13:38:49 +00:00
|
|
|
if (!hasOn())
|
2021-02-18 11:49:32 +00:00
|
|
|
{
|
|
|
|
if (auto it = left_type_map.find(col.name); it != left_type_map.end())
|
|
|
|
col.type = it->second;
|
|
|
|
}
|
2021-03-05 13:38:49 +00:00
|
|
|
if (correct_nullability && join_info.forceNullableLeft() && col.type->canBeInsideNullable())
|
2021-02-12 20:04:45 +00:00
|
|
|
{
|
|
|
|
/// No need to nullify constants
|
2021-02-09 13:17:42 +00:00
|
|
|
bool is_column_const = col.column && isColumnConst(*col.column);
|
|
|
|
if (!is_column_const)
|
2021-02-12 20:04:45 +00:00
|
|
|
col.type = makeNullable(col.type);
|
|
|
|
}
|
|
|
|
}
|
2019-09-02 19:58:45 +00:00
|
|
|
|
2021-02-09 13:17:42 +00:00
|
|
|
/// Types in columns_added_by_join already converted and set nullable if needed
|
2019-09-02 19:58:45 +00:00
|
|
|
for (const auto & col : columns_added_by_join)
|
2021-02-09 13:17:42 +00:00
|
|
|
columns.emplace_back(nullptr, col.type, col.name);
|
2019-09-02 19:58:45 +00:00
|
|
|
}
|
|
|
|
|
2020-07-10 18:10:06 +00:00
|
|
|
|
|
|
|
bool TableJoin::needStreamWithNonJoinedRows() const
|
|
|
|
{
|
2021-03-05 13:38:49 +00:00
|
|
|
if (join_info.strictness == ASTTableJoin::Strictness::Asof ||
|
|
|
|
join_info.strictness == ASTTableJoin::Strictness::Semi)
|
2020-07-10 18:10:06 +00:00
|
|
|
return false;
|
2021-03-05 13:38:49 +00:00
|
|
|
return isRightOrFull(join_info.kind);
|
2019-09-25 19:17:32 +00:00
|
|
|
}
|
|
|
|
|
2020-07-14 14:06:11 +00:00
|
|
|
bool TableJoin::allowDictJoin(const String & dict_key, const Block & sample_block, Names & src_names, NamesAndTypesList & dst_columns) const
|
2020-04-09 20:00:57 +00:00
|
|
|
{
|
2020-04-13 17:03:11 +00:00
|
|
|
/// Support ALL INNER, [ANY | ALL | SEMI | ANTI] LEFT
|
2021-03-05 13:38:49 +00:00
|
|
|
if (!isLeft(join_info.kind) && !(isInner(join_info.kind) && join_info.strictness == ASTTableJoin::Strictness::All))
|
2020-04-13 17:03:11 +00:00
|
|
|
return false;
|
|
|
|
|
2021-03-05 13:38:49 +00:00
|
|
|
if (key_names_right.size() != 1)
|
2020-04-09 20:00:57 +00:00
|
|
|
return false;
|
|
|
|
|
2020-07-14 14:06:11 +00:00
|
|
|
/// TODO: support 'JOIN ... ON expr(dict_key) = table_key'
|
2021-03-05 13:38:49 +00:00
|
|
|
auto it_key = original_names.find(key_names_right[0]);
|
2020-07-14 14:06:11 +00:00
|
|
|
if (it_key == original_names.end())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (dict_key != it_key->second)
|
|
|
|
return false; /// JOIN key != Dictionary key
|
|
|
|
|
2020-04-22 06:01:33 +00:00
|
|
|
for (const auto & col : sample_block)
|
2020-04-09 20:00:57 +00:00
|
|
|
{
|
2021-03-05 13:38:49 +00:00
|
|
|
if (col.name == key_names_right[0])
|
2020-04-09 20:00:57 +00:00
|
|
|
continue; /// do not extract key column
|
|
|
|
|
2020-07-14 14:06:11 +00:00
|
|
|
auto it = original_names.find(col.name);
|
|
|
|
if (it != original_names.end())
|
|
|
|
{
|
|
|
|
String original = it->second;
|
|
|
|
src_names.push_back(original);
|
|
|
|
dst_columns.push_back({col.name, col.type});
|
|
|
|
}
|
2020-04-09 20:00:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-03-05 13:38:49 +00:00
|
|
|
bool TableJoin::applyJoinKeyConvert(const ColumnsWithTypeAndName & left_sample_columns,
|
|
|
|
const ColumnsWithTypeAndName & right_sample_columns,
|
|
|
|
ActionsDAGPtr & left_converting_actions,
|
|
|
|
ActionsDAGPtr & right_converting_actions)
|
2021-02-18 11:49:32 +00:00
|
|
|
{
|
2021-03-05 13:38:49 +00:00
|
|
|
bool need_convert = !left_type_map.empty();
|
|
|
|
if (hasOn() && !need_convert)
|
2021-02-18 15:51:38 +00:00
|
|
|
{
|
|
|
|
/// For `USING` we already inferred common type an syntax analyzer stage
|
|
|
|
NamesAndTypesList left_list;
|
|
|
|
NamesAndTypesList right_list;
|
|
|
|
for (const auto & col : left_sample_columns)
|
|
|
|
left_list.emplace_back(col.name, col.type);
|
|
|
|
for (const auto & col : right_sample_columns)
|
|
|
|
right_list.emplace_back(col.name, col.type);
|
|
|
|
|
|
|
|
need_convert = inferJoinKeyCommonType(left_list, right_list);
|
|
|
|
}
|
2021-02-18 11:49:32 +00:00
|
|
|
|
2021-02-18 15:51:38 +00:00
|
|
|
if (need_convert)
|
|
|
|
{
|
2021-03-05 13:38:49 +00:00
|
|
|
left_converting_actions = JoinCommon::applyKeyConvertToTable(left_sample_columns, left_type_map, hasOn(), key_names_left);
|
|
|
|
right_converting_actions = JoinCommon::applyKeyConvertToTable(right_sample_columns, right_type_map, hasOn(), key_names_right);
|
2021-02-18 15:51:38 +00:00
|
|
|
}
|
2021-02-18 11:49:32 +00:00
|
|
|
|
2021-02-18 15:51:38 +00:00
|
|
|
return need_convert;
|
2021-02-18 11:49:32 +00:00
|
|
|
}
|
|
|
|
|
2021-02-09 13:17:42 +00:00
|
|
|
bool TableJoin::inferJoinKeyCommonType(const NamesAndTypesList & left, const NamesAndTypesList & right)
|
|
|
|
{
|
|
|
|
std::unordered_map<String, DataTypePtr> left_types;
|
2021-02-18 11:49:32 +00:00
|
|
|
for (const auto & col : left)
|
2021-02-09 13:17:42 +00:00
|
|
|
{
|
2021-02-18 11:49:32 +00:00
|
|
|
left_types[col.name] = col.type;
|
2021-02-09 13:17:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
std::unordered_map<String, DataTypePtr> right_types;
|
2021-02-18 11:49:32 +00:00
|
|
|
for (const auto & col : right)
|
2021-02-09 13:17:42 +00:00
|
|
|
{
|
2021-02-18 11:49:32 +00:00
|
|
|
if (auto it = renames.find(col.name); it != renames.end())
|
|
|
|
right_types[it->second] = col.type;
|
2021-02-09 13:17:42 +00:00
|
|
|
else
|
2021-02-18 11:49:32 +00:00
|
|
|
right_types[col.name] = col.type;
|
2021-02-09 13:17:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < key_names_left.size(); ++i)
|
|
|
|
{
|
2021-02-09 15:28:06 +00:00
|
|
|
auto ltype = left_types.find(key_names_left[i]);
|
|
|
|
auto rtype = right_types.find(key_names_right[i]);
|
|
|
|
if (ltype == left_types.end() || rtype == right_types.end())
|
|
|
|
{
|
|
|
|
/// Name mismatch, give up
|
|
|
|
left_type_map.clear();
|
|
|
|
right_type_map.clear();
|
|
|
|
return false;
|
|
|
|
}
|
2021-02-09 13:17:42 +00:00
|
|
|
|
2021-02-09 15:28:06 +00:00
|
|
|
if (JoinCommon::typesEqualUpToNullability(ltype->second, rtype->second))
|
2021-02-09 13:17:42 +00:00
|
|
|
continue;
|
|
|
|
|
|
|
|
DataTypePtr supertype;
|
|
|
|
try
|
|
|
|
{
|
2021-02-09 15:28:06 +00:00
|
|
|
supertype = DB::getLeastSupertype({ltype->second, rtype->second});
|
2021-02-09 13:17:42 +00:00
|
|
|
}
|
2021-02-18 16:43:41 +00:00
|
|
|
catch (DB::Exception & ex)
|
2021-02-09 13:17:42 +00:00
|
|
|
{
|
|
|
|
throw Exception(
|
|
|
|
"Type mismatch of columns to JOIN by: " +
|
2021-02-09 15:28:06 +00:00
|
|
|
key_names_left[i] + ": " + ltype->second->getName() + " at left, " +
|
2021-02-18 16:43:41 +00:00
|
|
|
key_names_right[i] + ": " + rtype->second->getName() + " at right. " +
|
|
|
|
"Can't get supertype: " + ex.message(),
|
2021-02-09 13:17:42 +00:00
|
|
|
ErrorCodes::TYPE_MISMATCH);
|
|
|
|
}
|
|
|
|
left_type_map[key_names_left[i]] = right_type_map[key_names_right[i]] = supertype;
|
|
|
|
}
|
|
|
|
|
|
|
|
return !left_type_map.empty();
|
|
|
|
}
|
|
|
|
|
2021-03-05 13:38:49 +00:00
|
|
|
JoinInfo TableJoin::getJoinInfo() const
|
2021-02-18 11:49:32 +00:00
|
|
|
{
|
2021-03-05 13:38:49 +00:00
|
|
|
JoinInfo res = join_info;
|
|
|
|
res.key_names_right = key_names_right;
|
|
|
|
res.key_names_left = key_names_left;
|
|
|
|
res.required_right_keys = requiredRightKeys();
|
|
|
|
return res;
|
|
|
|
}
|
2021-02-18 15:51:38 +00:00
|
|
|
|
|
|
|
|
2021-03-05 13:38:49 +00:00
|
|
|
|
|
|
|
JoinInfo::JoinInfo(const ASTTableJoin & table_join_ast, const Settings & settings)
|
|
|
|
: join_use_nulls(settings.join_use_nulls)
|
|
|
|
, join_algorithm(settings.join_algorithm)
|
|
|
|
, max_joined_block_rows(settings.max_joined_block_size_rows)
|
|
|
|
, partial_merge_join_optimizations(settings.partial_merge_join_optimizations)
|
|
|
|
, partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks)
|
|
|
|
, partial_merge_join_left_table_buffer_bytes(settings.partial_merge_join_left_table_buffer_bytes)
|
|
|
|
, max_files_to_merge(settings.join_on_disk_max_files_to_merge)
|
|
|
|
, size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode})
|
|
|
|
|
|
|
|
{
|
|
|
|
kind = table_join_ast.kind;
|
|
|
|
strictness = table_join_ast.strictness;
|
|
|
|
if (table_join_ast.using_expression_list)
|
|
|
|
match_expression = JoinInfo::MatchExpressionType::JoinUsing;
|
|
|
|
if (table_join_ast.on_expression)
|
|
|
|
match_expression = JoinInfo::MatchExpressionType::JoinOn;
|
|
|
|
|
|
|
|
|
|
|
|
if (!size_limits.hasLimits())
|
|
|
|
size_limits.max_bytes = settings.default_max_bytes_in_join;
|
2021-02-18 11:49:32 +00:00
|
|
|
}
|
|
|
|
|
2018-11-02 18:53:23 +00:00
|
|
|
}
|