dbms: implemented LEFT ARRAY JOIN [#METR-17474].

This commit is contained in:
Alexey Milovidov 2015-07-26 13:54:45 +03:00
parent 5c2b5ffb10
commit 0439ef5f7f
8 changed files with 75 additions and 10 deletions

View File

@ -66,6 +66,7 @@ public:
/// Для ARRAY_JOIN
NameSet array_joined_columns;
bool array_join_is_left;
/// Для JOIN
const Join * join = nullptr;
@ -122,13 +123,14 @@ public:
return a;
}
static ExpressionAction arrayJoin(const NameSet & array_joined_columns)
static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left)
{
if (array_joined_columns.empty())
throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR);
ExpressionAction a;
a.type = ARRAY_JOIN;
a.array_joined_columns = array_joined_columns;
a.array_join_is_left = array_join_is_left;
return a;
}

View File

@ -50,6 +50,7 @@ public:
ASTPtr select_expression_list;
ASTPtr database;
ASTPtr table; /// Идентификатор, табличная функция или подзапрос (рекурсивно ASTSelectQuery)
bool array_join_is_left = false; /// LEFT ARRAY JOIN
ASTPtr array_join_expression_list; /// ARRAY JOIN
ASTPtr join; /// Обычный (не ARRAY) JOIN.
bool final = false;

View File

@ -6,6 +6,7 @@
#include <DB/DataTypes/DataTypeNested.h>
#include <DB/DataTypes/DataTypeArray.h>
#include <DB/Functions/IFunction.h>
#include <DB/Functions/FunctionsArray.h>
#include <set>
@ -268,6 +269,24 @@ void ExpressionAction::execute(Block & block) const
if (!any_array)
throw Exception("ARRAY JOIN of not array: " + *array_joined_columns.begin(), ErrorCodes::TYPE_MISMATCH);
/// Если LEFT ARRAY JOIN, то создаём столбцы, в которых пустые массивы заменены на массивы с одним элементом - значением по-умолчанию.
std::map<String, ColumnPtr> non_empty_array_columns;
if (array_join_is_left)
{
for (const auto & name : array_joined_columns)
{
auto src_col = block.getByName(name);
Block tmp_block{src_col, {{}, src_col.type, {}}};
FunctionEmptyArrayToSingle().execute(tmp_block, {0}, 1);
non_empty_array_columns[name] = tmp_block.getByPosition(1).column;
}
any_array_ptr = non_empty_array_columns.begin()->second;
any_array = typeid_cast<const ColumnArray *>(&*any_array_ptr);
}
size_t columns = block.columns();
for (size_t i = 0; i < columns; ++i)
{
@ -278,7 +297,8 @@ void ExpressionAction::execute(Block & block) const
if (!typeid_cast<const DataTypeArray *>(&*current.type))
throw Exception("ARRAY JOIN of not array: " + current.name, ErrorCodes::TYPE_MISMATCH);
ColumnPtr array_ptr = current.column;
ColumnPtr array_ptr = array_join_is_left ? non_empty_array_columns[current.name] : current.column;
if (array_ptr->isConst())
array_ptr = dynamic_cast<const IColumnConst &>(*array_ptr).convertToFullColumn();
@ -379,7 +399,7 @@ std::string ExpressionAction::toString() const
break;
case ARRAY_JOIN:
ss << "ARRAY JOIN ";
ss << (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN ";
for (NameSet::const_iterator it = array_joined_columns.begin(); it != array_joined_columns.end(); ++it)
{
if (it != array_joined_columns.begin())
@ -761,7 +781,7 @@ std::string ExpressionActions::getID() const
ss << actions[i].result_name;
if (actions[i].type == ExpressionAction::ARRAY_JOIN)
{
ss << "{";
ss << (actions[i].array_join_is_left ? "LEFT ARRAY JOIN" : "ARRAY JOIN") << "{";
for (NameSet::const_iterator it = actions[i].array_joined_columns.begin();
it != actions[i].array_joined_columns.end(); ++it)
{

View File

@ -1369,7 +1369,7 @@ void ExpressionAnalyzer::getActionsImpl(ASTPtr ast, bool no_subqueries, bool onl
actions_stack.addAction(ExpressionAction::copyColumn(arg->getColumnName(), result_name));
NameSet joined_columns;
joined_columns.insert(result_name);
actions_stack.addAction(ExpressionAction::arrayJoin(joined_columns));
actions_stack.addAction(ExpressionAction::arrayJoin(joined_columns, false));
}
return;
@ -1666,7 +1666,7 @@ void ExpressionAnalyzer::addMultipleArrayJoinAction(ExpressionActionsPtr & actio
result_columns.insert(result_source.first);
}
actions->add(ExpressionAction::arrayJoin(result_columns));
actions->add(ExpressionAction::arrayJoin(result_columns, select_query->array_join_is_left));
}
bool ExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & chain, bool only_types)

View File

@ -23,6 +23,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
ParserString s_select("SELECT", true, true);
ParserString s_distinct("DISTINCT", true, true);
ParserString s_from("FROM", true, true);
ParserString s_left("LEFT", true, true);
ParserString s_array("ARRAY", true, true);
ParserString s_join("JOIN", true, true);
ParserString s_using("USING", true, true);
@ -166,8 +167,22 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
if (!parse_final_and_sample())
return false;
/// ARRAY JOIN expr list
if (s_array.ignore(pos, end, max_parsed_pos, expected))
/// [LEFT] ARRAY JOIN expr list
Pos saved_pos = pos;
bool has_array_join = false;
if (s_left.ignore(pos, end, max_parsed_pos, expected) && ws.ignore(pos, end) && s_array.ignore(pos, end, max_parsed_pos, expected))
{
select_query->array_join_is_left = true;
has_array_join = true;
}
else
{
pos = saved_pos;
if (s_array.ignore(pos, end, max_parsed_pos, expected))
has_array_join = true;
}
if (has_array_join)
{
ws.ignore(pos, end);
@ -182,7 +197,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
ws.ignore(pos, end);
}
/// [GLOBAL] ANY|ALL INNER|LEFT JOIN (subquery) USING tuple
/// [GLOBAL] [ANY|ALL] INNER|LEFT|RIGHT|FULL|CROSS [OUTER] JOIN (subquery)|table_name USING tuple
join.parse(pos, end, select_query->join, max_parsed_pos, expected);
if (!parse_final_and_sample())

View File

@ -163,7 +163,9 @@ void formatAST(const ASTSelectQuery & ast, std::ostream & s, size_t indent, bo
if (ast.array_join_expression_list)
{
s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str << "ARRAY JOIN " << (hilite ? hilite_none : "");
s << (hilite ? hilite_keyword : "") << nl_or_ws << indent_str
<< (ast.array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (hilite ? hilite_none : "");
one_line
? formatAST(*ast.array_join_expression_list, s, indent, hilite, one_line)
: formatExpressionListMultiline(typeid_cast<const ASTExpressionList &>(*ast.array_join_expression_list), s, indent, hilite);

View File

@ -0,0 +1,23 @@
0
1
2
2
3
4
5
5
6
7
0 [] 0
1 [0] 0
2 [0,1] 0
2 [0,1] 1
3 [] 0
4 [0] 0
5 [0,1] 0
5 [0,1] 1
6 [] 0
7 [0] 0
8 [0,1] 0
8 [0,1] 1
9 [] 0

View File

@ -0,0 +1,2 @@
SELECT number FROM system.numbers LEFT ARRAY JOIN range(number % 3) AS arr LIMIT 10;
SELECT number, arr, x FROM (SELECT number, range(number % 3) AS arr FROM system.numbers LIMIT 10) LEFT ARRAY JOIN arr AS x;