mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
dbms: added CROSS JOIN [#METR-16893].
This commit is contained in:
parent
6f70e8e05c
commit
763fe6fb93
@ -18,8 +18,9 @@ namespace DB
|
||||
|
||||
/** Структура данных для реализации JOIN-а.
|
||||
* По сути, хэш-таблица: ключи -> строки присоединяемой таблицы.
|
||||
* Исключение - CROSS JOIN, где вместо хэш-таблицы просто набор блоков без ключей.
|
||||
*
|
||||
* JOIN-ы бывают восьми типов: ANY/ALL x LEFT/INNER/RIGHT/FULL.
|
||||
* JOIN-ы бывают девяти типов: ANY/ALL × LEFT/INNER/RIGHT/FULL, а также CROSS.
|
||||
*
|
||||
* Если указано ANY - выбрать из "правой" таблицы только одну, первую попавшуюся строку, даже если там более одной соответствующей строки.
|
||||
* Если указано ALL - обычный вариант JOIN-а, при котором строки могут размножаться по числу соответствующих строк "правой" таблицы.
|
||||
@ -213,6 +214,7 @@ private:
|
||||
KEY_64,
|
||||
KEY_STRING,
|
||||
HASHED,
|
||||
CROSS,
|
||||
};
|
||||
|
||||
Type type = Type::EMPTY;
|
||||
@ -249,6 +251,8 @@ private:
|
||||
template <ASTJoin::Kind KIND, ASTJoin::Strictness STRICTNESS, typename Maps>
|
||||
void joinBlockImpl(Block & block, const Maps & maps) const;
|
||||
|
||||
void joinBlockImplCross(Block & block) const;
|
||||
|
||||
/// Проверить не превышены ли допустимые размеры множества
|
||||
bool checkSizeLimits() const;
|
||||
|
||||
|
@ -32,7 +32,8 @@ public:
|
||||
Inner, /// Оставить только записи, для которых в "правой" таблице есть соответствующая.
|
||||
Left, /// Если в "правой" таблице нет соответствующих записей, заполнить столбцы значениями "по-умолчанию".
|
||||
Right,
|
||||
Full
|
||||
Full,
|
||||
Cross /// Прямое произведение. strictness и using_expr_list не используются.
|
||||
};
|
||||
|
||||
Locality locality = Local;
|
||||
@ -61,7 +62,8 @@ public:
|
||||
kind == Inner ? "Inner"
|
||||
: (kind == Left ? "Left"
|
||||
: (kind == Right ? "Right"
|
||||
: "Full")), wb);
|
||||
: (kind == Full ? "Full"
|
||||
: "Cross"))), wb);
|
||||
|
||||
writeString("Join", wb);
|
||||
}
|
||||
|
@ -146,7 +146,10 @@ void ExpressionAnalyzer::analyzeAggregation()
|
||||
|
||||
if (select_query && select_query->join)
|
||||
{
|
||||
getRootActions(typeid_cast<ASTJoin &>(*select_query->join).using_expr_list, true, false, temp_actions);
|
||||
auto join = typeid_cast<ASTJoin &>(*select_query->join);
|
||||
if (join.using_expr_list)
|
||||
getRootActions(join.using_expr_list, true, false, temp_actions);
|
||||
|
||||
addJoinAction(temp_actions, true);
|
||||
}
|
||||
|
||||
@ -1548,7 +1551,8 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
|
||||
ExpressionActionsChain::Step & step = chain.steps.back();
|
||||
|
||||
ASTJoin & ast_join = typeid_cast<ASTJoin &>(*select_query->join);
|
||||
getRootActions(ast_join.using_expr_list, only_types, false, step.actions);
|
||||
if (ast_join.using_expr_list)
|
||||
getRootActions(ast_join.using_expr_list, only_types, false, step.actions);
|
||||
|
||||
/// Не поддерживается два JOIN-а с одинаковым подзапросом, но разными USING-ами.
|
||||
String join_id = ast_join.table->getColumnName();
|
||||
@ -1888,7 +1892,7 @@ void ExpressionAnalyzer::collectUsedColumns()
|
||||
}
|
||||
|
||||
/* for (const auto & name_type : columns_added_by_join)
|
||||
std::cerr << "JOINed column (required, not key): " << name_type.first << std::endl;
|
||||
std::cerr << "JOINed column (required, not key): " << name_type.name << std::endl;
|
||||
std::cerr << std::endl;*/
|
||||
|
||||
/// Вставляем в список требуемых столбцов столбцы, нужные для вычисления ARRAY JOIN.
|
||||
@ -1968,14 +1972,17 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd
|
||||
nested_result_sample = InterpreterSelectQuery::getSampleBlock(subquery, context);
|
||||
}
|
||||
|
||||
auto & keys = typeid_cast<ASTExpressionList &>(*node.using_expr_list);
|
||||
for (const auto & key : keys.children)
|
||||
if (node.using_expr_list)
|
||||
{
|
||||
if (!join_key_names_left_set.insert(key->getColumnName()).second)
|
||||
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
|
||||
auto & keys = typeid_cast<ASTExpressionList &>(*node.using_expr_list);
|
||||
for (const auto & key : keys.children)
|
||||
{
|
||||
if (!join_key_names_left_set.insert(key->getColumnName()).second)
|
||||
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
|
||||
|
||||
if (!join_key_names_right_set.insert(key->getAliasOrColumnName()).second)
|
||||
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
|
||||
if (!join_key_names_right_set.insert(key->getAliasOrColumnName()).second)
|
||||
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto i : ext::range(0, nested_result_sample.columns()))
|
||||
|
@ -22,6 +22,9 @@ Join::Type Join::chooseMethod(const ConstColumnPlainPtrs & key_columns, bool & k
|
||||
size_t keys_bytes = 0;
|
||||
key_sizes.resize(keys_size);
|
||||
|
||||
if (keys_size == 0)
|
||||
return Type::CROSS;
|
||||
|
||||
for (size_t j = 0; j < keys_size; ++j)
|
||||
{
|
||||
if (!key_columns[j]->isFixed())
|
||||
@ -61,6 +64,7 @@ static void initImpl(Maps & maps, Join::Type type)
|
||||
case Join::Type::KEY_64: maps.key64 .reset(new typename Maps::MapUInt64); break;
|
||||
case Join::Type::KEY_STRING: maps.key_string .reset(new typename Maps::MapString); break;
|
||||
case Join::Type::HASHED: maps.hashed .reset(new typename Maps::MapHashed); break;
|
||||
case Join::Type::CROSS: break;
|
||||
|
||||
default:
|
||||
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
|
||||
@ -105,6 +109,9 @@ void Join::init(Type type_)
|
||||
{
|
||||
type = type_;
|
||||
|
||||
if (kind == ASTJoin::Cross)
|
||||
return;
|
||||
|
||||
if (!getFullness(kind))
|
||||
{
|
||||
if (strictness == ASTJoin::Any)
|
||||
@ -124,21 +131,41 @@ void Join::init(Type type_)
|
||||
size_t Join::getTotalRowCount() const
|
||||
{
|
||||
size_t res = 0;
|
||||
res += getTotalRowCountImpl(maps_any);
|
||||
res += getTotalRowCountImpl(maps_all);
|
||||
res += getTotalRowCountImpl(maps_any_full);
|
||||
res += getTotalRowCountImpl(maps_all_full);
|
||||
|
||||
if (type == Type::CROSS)
|
||||
{
|
||||
for (const auto & block : blocks)
|
||||
res += block.rowsInFirstColumn();
|
||||
}
|
||||
else
|
||||
{
|
||||
res += getTotalRowCountImpl(maps_any);
|
||||
res += getTotalRowCountImpl(maps_all);
|
||||
res += getTotalRowCountImpl(maps_any_full);
|
||||
res += getTotalRowCountImpl(maps_all_full);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
size_t Join::getTotalByteCount() const
|
||||
{
|
||||
size_t res = 0;
|
||||
res += getTotalByteCountImpl(maps_any);
|
||||
res += getTotalByteCountImpl(maps_all);
|
||||
res += getTotalByteCountImpl(maps_any_full);
|
||||
res += getTotalByteCountImpl(maps_all_full);
|
||||
res += pool.size();
|
||||
|
||||
if (type == Type::CROSS)
|
||||
{
|
||||
for (const auto & block : blocks)
|
||||
res += block.bytes();
|
||||
}
|
||||
else
|
||||
{
|
||||
res += getTotalByteCountImpl(maps_any);
|
||||
res += getTotalByteCountImpl(maps_all);
|
||||
res += getTotalByteCountImpl(maps_any_full);
|
||||
res += getTotalByteCountImpl(maps_all_full);
|
||||
res += pool.size();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -258,7 +285,11 @@ template <> struct Inserter<ASTJoin::All, Join::MapsAllFull::MapString> : Insert
|
||||
template <ASTJoin::Strictness STRICTNESS, typename Maps>
|
||||
void Join::insertFromBlockImpl(Maps & maps, size_t rows, const ConstColumnPlainPtrs & key_columns, size_t keys_size, Block * stored_block)
|
||||
{
|
||||
if (type == Type::KEY_64)
|
||||
if (type == Type::CROSS)
|
||||
{
|
||||
/// Ничего не делаем. Уже сохранили блок, и этого достаточно.
|
||||
}
|
||||
else if (type == Type::KEY_64)
|
||||
{
|
||||
typedef typename Maps::MapUInt64 Map;
|
||||
Map & res = *maps.key64;
|
||||
@ -409,19 +440,23 @@ bool Join::insertFromBlock(const Block & block)
|
||||
stored_block->getByPosition(i).column = dynamic_cast<IColumnConst &>(*col).convertToFullColumn();
|
||||
}
|
||||
|
||||
if (!getFullness(kind))
|
||||
if (kind != ASTJoin::Cross)
|
||||
{
|
||||
if (strictness == ASTJoin::Any)
|
||||
insertFromBlockImpl<ASTJoin::Any>(maps_any, rows, key_columns, keys_size, stored_block);
|
||||
/// Заполняем нужную хэш-таблицу.
|
||||
if (!getFullness(kind))
|
||||
{
|
||||
if (strictness == ASTJoin::Any)
|
||||
insertFromBlockImpl<ASTJoin::Any>(maps_any, rows, key_columns, keys_size, stored_block);
|
||||
else
|
||||
insertFromBlockImpl<ASTJoin::All>(maps_all, rows, key_columns, keys_size, stored_block);
|
||||
}
|
||||
else
|
||||
insertFromBlockImpl<ASTJoin::All>(maps_all, rows, key_columns, keys_size, stored_block);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (strictness == ASTJoin::Any)
|
||||
insertFromBlockImpl<ASTJoin::Any>(maps_any_full, rows, key_columns, keys_size, stored_block);
|
||||
else
|
||||
insertFromBlockImpl<ASTJoin::All>(maps_all_full, rows, key_columns, keys_size, stored_block);
|
||||
{
|
||||
if (strictness == ASTJoin::Any)
|
||||
insertFromBlockImpl<ASTJoin::Any>(maps_any_full, rows, key_columns, keys_size, stored_block);
|
||||
else
|
||||
insertFromBlockImpl<ASTJoin::All>(maps_all_full, rows, key_columns, keys_size, stored_block);
|
||||
}
|
||||
}
|
||||
|
||||
if (!checkSizeLimits())
|
||||
@ -677,6 +712,60 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const
|
||||
}
|
||||
|
||||
|
||||
void Join::joinBlockImplCross(Block & block) const
|
||||
{
|
||||
Block res = block.cloneEmpty();
|
||||
|
||||
/// Добавляем в блок новые столбцы.
|
||||
size_t num_existing_columns = res.columns();
|
||||
size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
|
||||
|
||||
ColumnPlainPtrs src_left_columns(num_existing_columns);
|
||||
ColumnPlainPtrs dst_left_columns(num_existing_columns);
|
||||
ColumnPlainPtrs dst_right_columns(num_columns_to_add);
|
||||
|
||||
for (size_t i = 0; i < num_existing_columns; ++i)
|
||||
{
|
||||
src_left_columns[i] = block.unsafeGetByPosition(i).column;
|
||||
dst_left_columns[i] = res.unsafeGetByPosition(i).column;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_columns_to_add; ++i)
|
||||
{
|
||||
const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.unsafeGetByPosition(i);
|
||||
ColumnWithTypeAndName new_column = src_column.cloneEmpty();
|
||||
res.insert(new_column);
|
||||
dst_right_columns[i] = new_column.column;
|
||||
}
|
||||
|
||||
size_t rows_left = block.rowsInFirstColumn();
|
||||
|
||||
/// NOTE Было бы оптимальнее использовать reserve, а также методы replicate для размножения значений левого блока.
|
||||
|
||||
for (size_t i = 0; i < rows_left; ++i)
|
||||
{
|
||||
for (const Block & block_right : blocks)
|
||||
{
|
||||
size_t rows_right = block_right.rowsInFirstColumn();
|
||||
|
||||
for (size_t col_num = 0; col_num < num_existing_columns; ++col_num)
|
||||
for (size_t j = 0; j < rows_right; ++j)
|
||||
dst_left_columns[col_num]->insertFrom(*src_left_columns[col_num], i);
|
||||
|
||||
for (size_t col_num = 0; col_num < num_columns_to_add; ++col_num)
|
||||
{
|
||||
const IColumn * column_right = block_right.unsafeGetByPosition(col_num).column;
|
||||
|
||||
for (size_t j = 0; j < rows_right; ++j)
|
||||
dst_right_columns[col_num]->insertFrom(*column_right, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
block = res;
|
||||
}
|
||||
|
||||
|
||||
void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) const
|
||||
{
|
||||
size_t keys_size = key_names_left.size();
|
||||
@ -712,6 +801,10 @@ void Join::joinBlock(Block & block) const
|
||||
joinBlockImpl<ASTJoin::Left, ASTJoin::All>(block, maps_all_full);
|
||||
else if (kind == ASTJoin::Right && strictness == ASTJoin::All)
|
||||
joinBlockImpl<ASTJoin::Inner, ASTJoin::All>(block, maps_all_full);
|
||||
else if (kind == ASTJoin::Cross)
|
||||
joinBlockImplCross(block);
|
||||
else
|
||||
throw Exception("Logical error: unknown combination of JOIN", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
|
||||
|
@ -24,6 +24,7 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p
|
||||
ParserString s_left("LEFT", true, true);
|
||||
ParserString s_right("RIGHT", true, true);
|
||||
ParserString s_full("FULL", true, true);
|
||||
ParserString s_cross("CROSS", true, true);
|
||||
ParserString s_outer("OUTER", true, true);
|
||||
ParserString s_join("JOIN", true, true);
|
||||
ParserString s_using("USING", true, true);
|
||||
@ -41,15 +42,13 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p
|
||||
|
||||
ws.ignore(pos, end);
|
||||
|
||||
bool has_strictness = true;
|
||||
if (s_any.ignore(pos, end))
|
||||
join->strictness = ASTJoin::Any;
|
||||
else if (s_all.ignore(pos, end))
|
||||
join->strictness = ASTJoin::All;
|
||||
else
|
||||
{
|
||||
expected = "ANY|ALL";
|
||||
return false;
|
||||
}
|
||||
has_strictness = false;
|
||||
|
||||
ws.ignore(pos, end);
|
||||
|
||||
@ -61,16 +60,24 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p
|
||||
join->kind = ASTJoin::Right;
|
||||
else if (s_full.ignore(pos, end))
|
||||
join->kind = ASTJoin::Full;
|
||||
else if (s_cross.ignore(pos, end))
|
||||
join->kind = ASTJoin::Cross;
|
||||
else
|
||||
{
|
||||
expected = "INNER|LEFT|RIGHT|FULL";
|
||||
expected = "INNER|LEFT|RIGHT|FULL|CROSS";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!has_strictness && join->kind != ASTJoin::Cross)
|
||||
throw Exception("You must specify ANY or ALL for JOIN, before INNER or LEFT or RIGHT or FULL.", ErrorCodes::SYNTAX_ERROR);
|
||||
|
||||
if (has_strictness && join->kind == ASTJoin::Cross)
|
||||
throw Exception("You must not specify ANY or ALL for CROSS JOIN.", ErrorCodes::SYNTAX_ERROR);
|
||||
|
||||
ws.ignore(pos, end);
|
||||
|
||||
/// Для всех JOIN-ов кроме INNER может присутствовать не обязательное слово "OUTER".
|
||||
if (join->kind != ASTJoin::Inner && s_outer.ignore(pos, end))
|
||||
/// Для всех JOIN-ов кроме INNER и CROSS может присутствовать не обязательное слово "OUTER".
|
||||
if (join->kind != ASTJoin::Inner && join->kind != ASTJoin::Cross && s_outer.ignore(pos, end))
|
||||
ws.ignore(pos, end);
|
||||
|
||||
if (!s_join.ignore(pos, end, max_parsed_pos, expected))
|
||||
@ -88,18 +95,23 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p
|
||||
ParserAlias().ignore(pos, end);
|
||||
ws.ignore(pos, end);
|
||||
|
||||
if (!s_using.ignore(pos, end, max_parsed_pos, expected))
|
||||
return false;
|
||||
if (join->kind != ASTJoin::Cross)
|
||||
{
|
||||
if (!s_using.ignore(pos, end, max_parsed_pos, expected))
|
||||
return false;
|
||||
|
||||
ws.ignore(pos, end);
|
||||
ws.ignore(pos, end);
|
||||
|
||||
if (!exp_list.parse(pos, end, join->using_expr_list, max_parsed_pos, expected))
|
||||
return false;
|
||||
if (!exp_list.parse(pos, end, join->using_expr_list, max_parsed_pos, expected))
|
||||
return false;
|
||||
|
||||
ws.ignore(pos, end);
|
||||
ws.ignore(pos, end);
|
||||
}
|
||||
|
||||
join->children.push_back(join->table);
|
||||
join->children.push_back(join->using_expr_list);
|
||||
|
||||
if (join->using_expr_list)
|
||||
join->children.push_back(join->using_expr_list);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -829,21 +829,30 @@ void formatAST(const ASTSet & ast, std::ostream & s, size_t indent, bool hilite,
|
||||
|
||||
void formatAST(const ASTJoin & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
|
||||
{
|
||||
s << (hilite ? hilite_keyword : "")
|
||||
<< (ast.locality == ASTJoin::Global ? "GLOBAL " : "")
|
||||
<< (ast.strictness == ASTJoin::Any ? "ANY " : "ALL ")
|
||||
<< (ast.kind == ASTJoin::Inner ? "INNER "
|
||||
: (ast.kind == ASTJoin::Left ? "LEFT "
|
||||
: (ast.kind == ASTJoin::Right ? "RIGHT "
|
||||
: "FULL OUTER ")))
|
||||
<< "JOIN "
|
||||
s << (hilite ? hilite_keyword : "");
|
||||
|
||||
if (ast.locality == ASTJoin::Global)
|
||||
s << "GLOBAL ";
|
||||
|
||||
if (ast.kind != ASTJoin::Cross)
|
||||
s << (ast.strictness == ASTJoin::Any ? "ANY " : "ALL ");
|
||||
|
||||
s << (ast.kind == ASTJoin::Inner ? "INNER "
|
||||
: (ast.kind == ASTJoin::Left ? "LEFT "
|
||||
: (ast.kind == ASTJoin::Right ? "RIGHT "
|
||||
: (ast.kind == ASTJoin::Cross ? "CROSS "
|
||||
: "FULL OUTER "))));
|
||||
|
||||
s << "JOIN "
|
||||
<< (hilite ? hilite_none : "");
|
||||
|
||||
formatAST(*ast.table, s, indent, hilite, one_line, need_parens);
|
||||
|
||||
s << (hilite ? hilite_keyword : "") << " USING " << (hilite ? hilite_none : "");
|
||||
|
||||
formatAST(*ast.using_expr_list, s, indent, hilite, one_line, need_parens);
|
||||
if (ast.kind != ASTJoin::Cross)
|
||||
{
|
||||
s << (hilite ? hilite_keyword : "") << " USING " << (hilite ? hilite_none : "");
|
||||
formatAST(*ast.using_expr_list, s, indent, hilite, one_line, need_parens);
|
||||
}
|
||||
}
|
||||
|
||||
void formatAST(const ASTCheckQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
|
||||
|
15
dbms/tests/queries/0_stateless/00202_cross_join.reference
Normal file
15
dbms/tests/queries/0_stateless/00202_cross_join.reference
Normal file
@ -0,0 +1,15 @@
|
||||
0 0
|
||||
0 1
|
||||
0 2
|
||||
0 3
|
||||
0 4
|
||||
1 0
|
||||
1 1
|
||||
1 2
|
||||
1 3
|
||||
1 4
|
||||
2 0
|
||||
2 1
|
||||
2 2
|
||||
2 3
|
||||
2 4
|
1
dbms/tests/queries/0_stateless/00202_cross_join.sql
Normal file
1
dbms/tests/queries/0_stateless/00202_cross_join.sql
Normal file
@ -0,0 +1 @@
|
||||
SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5);
|
Loading…
Reference in New Issue
Block a user