dbms: added CROSS JOIN [#METR-16893].

This commit is contained in:
Alexey Milovidov 2015-07-23 23:23:24 +03:00
parent 6f70e8e05c
commit 763fe6fb93
8 changed files with 201 additions and 58 deletions

View File

@ -18,8 +18,9 @@ namespace DB
/** Структура данных для реализации JOIN-а.
* По сути, хэш-таблица: ключи -> строки присоединяемой таблицы.
* Исключение - CROSS JOIN, где вместо хэш-таблицы просто набор блоков без ключей.
*
* JOIN-ы бывают восьми типов: ANY/ALL x LEFT/INNER/RIGHT/FULL.
* JOIN-ы бывают девяти типов: ANY/ALL × LEFT/INNER/RIGHT/FULL, а также CROSS.
*
* Если указано ANY - выбрать из "правой" таблицы только одну, первую попавшуюся строку, даже если там более одной соответствующей строки.
* Если указано ALL - обычный вариант JOIN-а, при котором строки могут размножаться по числу соответствующих строк "правой" таблицы.
@ -213,6 +214,7 @@ private:
KEY_64,
KEY_STRING,
HASHED,
CROSS,
};
Type type = Type::EMPTY;
@ -249,6 +251,8 @@ private:
template <ASTJoin::Kind KIND, ASTJoin::Strictness STRICTNESS, typename Maps>
void joinBlockImpl(Block & block, const Maps & maps) const;
void joinBlockImplCross(Block & block) const;
/// Проверить не превышены ли допустимые размеры множества
bool checkSizeLimits() const;

View File

@ -32,7 +32,8 @@ public:
Inner, /// Оставить только записи, для которых в "правой" таблице есть соответствующая.
Left, /// Если в "правой" таблице нет соответствующих записей, заполнить столбцы значениями "по-умолчанию".
Right,
Full
Full,
Cross /// Прямое произведение. strictness и using_expr_list не используются.
};
Locality locality = Local;
@ -61,7 +62,8 @@ public:
kind == Inner ? "Inner"
: (kind == Left ? "Left"
: (kind == Right ? "Right"
: "Full")), wb);
: (kind == Full ? "Full"
: "Cross"))), wb);
writeString("Join", wb);
}

View File

@ -146,7 +146,10 @@ void ExpressionAnalyzer::analyzeAggregation()
if (select_query && select_query->join)
{
getRootActions(typeid_cast<ASTJoin &>(*select_query->join).using_expr_list, true, false, temp_actions);
auto join = typeid_cast<ASTJoin &>(*select_query->join);
if (join.using_expr_list)
getRootActions(join.using_expr_list, true, false, temp_actions);
addJoinAction(temp_actions, true);
}
@ -1548,7 +1551,8 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
ExpressionActionsChain::Step & step = chain.steps.back();
ASTJoin & ast_join = typeid_cast<ASTJoin &>(*select_query->join);
getRootActions(ast_join.using_expr_list, only_types, false, step.actions);
if (ast_join.using_expr_list)
getRootActions(ast_join.using_expr_list, only_types, false, step.actions);
/// Не поддерживается два JOIN-а с одинаковым подзапросом, но разными USING-ами.
String join_id = ast_join.table->getColumnName();
@ -1888,7 +1892,7 @@ void ExpressionAnalyzer::collectUsedColumns()
}
/* for (const auto & name_type : columns_added_by_join)
std::cerr << "JOINed column (required, not key): " << name_type.first << std::endl;
std::cerr << "JOINed column (required, not key): " << name_type.name << std::endl;
std::cerr << std::endl;*/
/// Вставляем в список требуемых столбцов столбцы, нужные для вычисления ARRAY JOIN.
@ -1968,14 +1972,17 @@ void ExpressionAnalyzer::collectJoinedColumns(NameSet & joined_columns, NamesAnd
nested_result_sample = InterpreterSelectQuery::getSampleBlock(subquery, context);
}
auto & keys = typeid_cast<ASTExpressionList &>(*node.using_expr_list);
for (const auto & key : keys.children)
if (node.using_expr_list)
{
if (!join_key_names_left_set.insert(key->getColumnName()).second)
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
auto & keys = typeid_cast<ASTExpressionList &>(*node.using_expr_list);
for (const auto & key : keys.children)
{
if (!join_key_names_left_set.insert(key->getColumnName()).second)
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
if (!join_key_names_right_set.insert(key->getAliasOrColumnName()).second)
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
if (!join_key_names_right_set.insert(key->getAliasOrColumnName()).second)
throw Exception("Duplicate column in USING list", ErrorCodes::DUPLICATE_COLUMN);
}
}
for (const auto i : ext::range(0, nested_result_sample.columns()))

View File

@ -22,6 +22,9 @@ Join::Type Join::chooseMethod(const ConstColumnPlainPtrs & key_columns, bool & k
size_t keys_bytes = 0;
key_sizes.resize(keys_size);
if (keys_size == 0)
return Type::CROSS;
for (size_t j = 0; j < keys_size; ++j)
{
if (!key_columns[j]->isFixed())
@ -61,6 +64,7 @@ static void initImpl(Maps & maps, Join::Type type)
case Join::Type::KEY_64: maps.key64 .reset(new typename Maps::MapUInt64); break;
case Join::Type::KEY_STRING: maps.key_string .reset(new typename Maps::MapString); break;
case Join::Type::HASHED: maps.hashed .reset(new typename Maps::MapHashed); break;
case Join::Type::CROSS: break;
default:
throw Exception("Unknown JOIN keys variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT);
@ -105,6 +109,9 @@ void Join::init(Type type_)
{
type = type_;
if (kind == ASTJoin::Cross)
return;
if (!getFullness(kind))
{
if (strictness == ASTJoin::Any)
@ -124,21 +131,41 @@ void Join::init(Type type_)
size_t Join::getTotalRowCount() const
{
size_t res = 0;
res += getTotalRowCountImpl(maps_any);
res += getTotalRowCountImpl(maps_all);
res += getTotalRowCountImpl(maps_any_full);
res += getTotalRowCountImpl(maps_all_full);
if (type == Type::CROSS)
{
for (const auto & block : blocks)
res += block.rowsInFirstColumn();
}
else
{
res += getTotalRowCountImpl(maps_any);
res += getTotalRowCountImpl(maps_all);
res += getTotalRowCountImpl(maps_any_full);
res += getTotalRowCountImpl(maps_all_full);
}
return res;
}
size_t Join::getTotalByteCount() const
{
size_t res = 0;
res += getTotalByteCountImpl(maps_any);
res += getTotalByteCountImpl(maps_all);
res += getTotalByteCountImpl(maps_any_full);
res += getTotalByteCountImpl(maps_all_full);
res += pool.size();
if (type == Type::CROSS)
{
for (const auto & block : blocks)
res += block.bytes();
}
else
{
res += getTotalByteCountImpl(maps_any);
res += getTotalByteCountImpl(maps_all);
res += getTotalByteCountImpl(maps_any_full);
res += getTotalByteCountImpl(maps_all_full);
res += pool.size();
}
return res;
}
@ -258,7 +285,11 @@ template <> struct Inserter<ASTJoin::All, Join::MapsAllFull::MapString> : Insert
template <ASTJoin::Strictness STRICTNESS, typename Maps>
void Join::insertFromBlockImpl(Maps & maps, size_t rows, const ConstColumnPlainPtrs & key_columns, size_t keys_size, Block * stored_block)
{
if (type == Type::KEY_64)
if (type == Type::CROSS)
{
/// Ничего не делаем. Уже сохранили блок, и этого достаточно.
}
else if (type == Type::KEY_64)
{
typedef typename Maps::MapUInt64 Map;
Map & res = *maps.key64;
@ -409,19 +440,23 @@ bool Join::insertFromBlock(const Block & block)
stored_block->getByPosition(i).column = dynamic_cast<IColumnConst &>(*col).convertToFullColumn();
}
if (!getFullness(kind))
if (kind != ASTJoin::Cross)
{
if (strictness == ASTJoin::Any)
insertFromBlockImpl<ASTJoin::Any>(maps_any, rows, key_columns, keys_size, stored_block);
/// Заполняем нужную хэш-таблицу.
if (!getFullness(kind))
{
if (strictness == ASTJoin::Any)
insertFromBlockImpl<ASTJoin::Any>(maps_any, rows, key_columns, keys_size, stored_block);
else
insertFromBlockImpl<ASTJoin::All>(maps_all, rows, key_columns, keys_size, stored_block);
}
else
insertFromBlockImpl<ASTJoin::All>(maps_all, rows, key_columns, keys_size, stored_block);
}
else
{
if (strictness == ASTJoin::Any)
insertFromBlockImpl<ASTJoin::Any>(maps_any_full, rows, key_columns, keys_size, stored_block);
else
insertFromBlockImpl<ASTJoin::All>(maps_all_full, rows, key_columns, keys_size, stored_block);
{
if (strictness == ASTJoin::Any)
insertFromBlockImpl<ASTJoin::Any>(maps_any_full, rows, key_columns, keys_size, stored_block);
else
insertFromBlockImpl<ASTJoin::All>(maps_all_full, rows, key_columns, keys_size, stored_block);
}
}
if (!checkSizeLimits())
@ -677,6 +712,60 @@ void Join::joinBlockImpl(Block & block, const Maps & maps) const
}
void Join::joinBlockImplCross(Block & block) const
{
Block res = block.cloneEmpty();
/// Добавляем в блок новые столбцы.
size_t num_existing_columns = res.columns();
size_t num_columns_to_add = sample_block_with_columns_to_add.columns();
ColumnPlainPtrs src_left_columns(num_existing_columns);
ColumnPlainPtrs dst_left_columns(num_existing_columns);
ColumnPlainPtrs dst_right_columns(num_columns_to_add);
for (size_t i = 0; i < num_existing_columns; ++i)
{
src_left_columns[i] = block.unsafeGetByPosition(i).column;
dst_left_columns[i] = res.unsafeGetByPosition(i).column;
}
for (size_t i = 0; i < num_columns_to_add; ++i)
{
const ColumnWithTypeAndName & src_column = sample_block_with_columns_to_add.unsafeGetByPosition(i);
ColumnWithTypeAndName new_column = src_column.cloneEmpty();
res.insert(new_column);
dst_right_columns[i] = new_column.column;
}
size_t rows_left = block.rowsInFirstColumn();
/// NOTE Было бы оптимальнее использовать reserve, а также методы replicate для размножения значений левого блока.
for (size_t i = 0; i < rows_left; ++i)
{
for (const Block & block_right : blocks)
{
size_t rows_right = block_right.rowsInFirstColumn();
for (size_t col_num = 0; col_num < num_existing_columns; ++col_num)
for (size_t j = 0; j < rows_right; ++j)
dst_left_columns[col_num]->insertFrom(*src_left_columns[col_num], i);
for (size_t col_num = 0; col_num < num_columns_to_add; ++col_num)
{
const IColumn * column_right = block_right.unsafeGetByPosition(col_num).column;
for (size_t j = 0; j < rows_right; ++j)
dst_right_columns[col_num]->insertFrom(*column_right, j);
}
}
}
block = res;
}
void Join::checkTypesOfKeys(const Block & block_left, const Block & block_right) const
{
size_t keys_size = key_names_left.size();
@ -712,6 +801,10 @@ void Join::joinBlock(Block & block) const
joinBlockImpl<ASTJoin::Left, ASTJoin::All>(block, maps_all_full);
else if (kind == ASTJoin::Right && strictness == ASTJoin::All)
joinBlockImpl<ASTJoin::Inner, ASTJoin::All>(block, maps_all_full);
else if (kind == ASTJoin::Cross)
joinBlockImplCross(block);
else
throw Exception("Logical error: unknown combination of JOIN", ErrorCodes::LOGICAL_ERROR);
}

View File

@ -24,6 +24,7 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p
ParserString s_left("LEFT", true, true);
ParserString s_right("RIGHT", true, true);
ParserString s_full("FULL", true, true);
ParserString s_cross("CROSS", true, true);
ParserString s_outer("OUTER", true, true);
ParserString s_join("JOIN", true, true);
ParserString s_using("USING", true, true);
@ -41,15 +42,13 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p
ws.ignore(pos, end);
bool has_strictness = true;
if (s_any.ignore(pos, end))
join->strictness = ASTJoin::Any;
else if (s_all.ignore(pos, end))
join->strictness = ASTJoin::All;
else
{
expected = "ANY|ALL";
return false;
}
has_strictness = false;
ws.ignore(pos, end);
@ -61,16 +60,24 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p
join->kind = ASTJoin::Right;
else if (s_full.ignore(pos, end))
join->kind = ASTJoin::Full;
else if (s_cross.ignore(pos, end))
join->kind = ASTJoin::Cross;
else
{
expected = "INNER|LEFT|RIGHT|FULL";
expected = "INNER|LEFT|RIGHT|FULL|CROSS";
return false;
}
if (!has_strictness && join->kind != ASTJoin::Cross)
throw Exception("You must specify ANY or ALL for JOIN, before INNER or LEFT or RIGHT or FULL.", ErrorCodes::SYNTAX_ERROR);
if (has_strictness && join->kind == ASTJoin::Cross)
throw Exception("You must not specify ANY or ALL for CROSS JOIN.", ErrorCodes::SYNTAX_ERROR);
ws.ignore(pos, end);
/// Для всех JOIN-ов кроме INNER может присутствовать не обязательное слово "OUTER".
if (join->kind != ASTJoin::Inner && s_outer.ignore(pos, end))
/// Для всех JOIN-ов кроме INNER и CROSS может присутствовать не обязательное слово "OUTER".
if (join->kind != ASTJoin::Inner && join->kind != ASTJoin::Cross && s_outer.ignore(pos, end))
ws.ignore(pos, end);
if (!s_join.ignore(pos, end, max_parsed_pos, expected))
@ -88,18 +95,23 @@ bool ParserJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_p
ParserAlias().ignore(pos, end);
ws.ignore(pos, end);
if (!s_using.ignore(pos, end, max_parsed_pos, expected))
return false;
if (join->kind != ASTJoin::Cross)
{
if (!s_using.ignore(pos, end, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
ws.ignore(pos, end);
if (!exp_list.parse(pos, end, join->using_expr_list, max_parsed_pos, expected))
return false;
if (!exp_list.parse(pos, end, join->using_expr_list, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
ws.ignore(pos, end);
}
join->children.push_back(join->table);
join->children.push_back(join->using_expr_list);
if (join->using_expr_list)
join->children.push_back(join->using_expr_list);
return true;
}

View File

@ -829,21 +829,30 @@ void formatAST(const ASTSet & ast, std::ostream & s, size_t indent, bool hilite,
void formatAST(const ASTJoin & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)
{
s << (hilite ? hilite_keyword : "")
<< (ast.locality == ASTJoin::Global ? "GLOBAL " : "")
<< (ast.strictness == ASTJoin::Any ? "ANY " : "ALL ")
<< (ast.kind == ASTJoin::Inner ? "INNER "
: (ast.kind == ASTJoin::Left ? "LEFT "
: (ast.kind == ASTJoin::Right ? "RIGHT "
: "FULL OUTER ")))
<< "JOIN "
s << (hilite ? hilite_keyword : "");
if (ast.locality == ASTJoin::Global)
s << "GLOBAL ";
if (ast.kind != ASTJoin::Cross)
s << (ast.strictness == ASTJoin::Any ? "ANY " : "ALL ");
s << (ast.kind == ASTJoin::Inner ? "INNER "
: (ast.kind == ASTJoin::Left ? "LEFT "
: (ast.kind == ASTJoin::Right ? "RIGHT "
: (ast.kind == ASTJoin::Cross ? "CROSS "
: "FULL OUTER "))));
s << "JOIN "
<< (hilite ? hilite_none : "");
formatAST(*ast.table, s, indent, hilite, one_line, need_parens);
s << (hilite ? hilite_keyword : "") << " USING " << (hilite ? hilite_none : "");
formatAST(*ast.using_expr_list, s, indent, hilite, one_line, need_parens);
if (ast.kind != ASTJoin::Cross)
{
s << (hilite ? hilite_keyword : "") << " USING " << (hilite ? hilite_none : "");
formatAST(*ast.using_expr_list, s, indent, hilite, one_line, need_parens);
}
}
void formatAST(const ASTCheckQuery & ast, std::ostream & s, size_t indent, bool hilite, bool one_line, bool need_parens)

View File

@ -0,0 +1,15 @@
0 0
0 1
0 2
0 3
0 4
1 0
1 1
1 2
1 3
1 4
2 0
2 1
2 2
2 3
2 4

View File

@ -0,0 +1 @@
SELECT x, y FROM (SELECT number AS x FROM system.numbers LIMIT 3) CROSS JOIN (SELECT number AS y FROM system.numbers LIMIT 5);