Rectifying JOINs [#METR-2944].

This commit is contained in:
Alexey Milovidov 2016-07-18 03:14:24 +03:00
parent 3d4aaf1a33
commit 40ae51e7b3
9 changed files with 369 additions and 272 deletions

View File

@ -82,7 +82,6 @@ add_library (dbms
include/DB/Parsers/ASTExpressionList.h
include/DB/Parsers/ASTQueryWithOutput.h
include/DB/Parsers/ParserSelectQuery.h
include/DB/Parsers/ParserTableExpression.h
include/DB/Parsers/ParserUseQuery.h
include/DB/Parsers/ASTShowTablesQuery.h
include/DB/Parsers/ASTFunction.h
@ -133,6 +132,7 @@ add_library (dbms
include/DB/Parsers/ParserSampleRatio.h
include/DB/Parsers/ParserCase.h
include/DB/Parsers/ASTTablesInSelectQuery.h
include/DB/Parsers/ParserTablesInSelectQuery.h
include/DB/AggregateFunctions/AggregateFunctionMerge.h
include/DB/AggregateFunctions/AggregateFunctionUniqUpTo.h
include/DB/AggregateFunctions/AggregateFunctionIf.h
@ -790,7 +790,6 @@ add_library (dbms
src/Parsers/ParserQueryWithOutput.cpp
src/Parsers/ParserCreateQuery.cpp
src/Parsers/ParserSelectQuery.cpp
src/Parsers/ParserTableExpression.cpp
src/Parsers/ParserJoin.cpp
src/Parsers/ParserInsertQuery.cpp
src/Parsers/ParserDropQuery.cpp
@ -804,6 +803,7 @@ add_library (dbms
src/Parsers/ParserCheckQuery.cpp
src/Parsers/ParserSampleRatio.cpp
src/Parsers/ParserCase.cpp
src/Parsers/ParserTablesInSelectQuery.cpp
src/Parsers/formatAST.cpp
src/Parsers/parseQuery.cpp
src/Parsers/queryToString.cpp

View File

@ -48,14 +48,7 @@ private:
public:
bool distinct = false;
ASTPtr select_expression_list;
ASTPtr database;
ASTPtr table; /// Name of table, table function or subquery (recursivelly ASTSelectQuery)
bool array_join_is_left = false; /// LEFT ARRAY JOIN
ASTPtr array_join_expression_list; /// ARRAY JOIN
ASTPtr join; /// Ordinary (not ARRAY) JOIN.
bool final = false;
ASTPtr sample_size;
ASTPtr sample_offset;
ASTPtr tables;
ASTPtr prewhere_expression;
ASTPtr where_expression;
ASTPtr group_expression_list;
@ -66,6 +59,16 @@ public:
ASTPtr limit_length;
ASTPtr settings;
/// TODO remove
ASTPtr database;
ASTPtr table;
ASTPtr sample_size;
ASTPtr sample_offset;
ASTPtr array_join_expression_list;
ASTPtr join;
bool array_join_is_left;
bool final;
/// Двусвязный список запросов SELECT внутри запроса UNION ALL.
/// Следующий запрос SELECT в цепочке UNION ALL, если такой есть

View File

@ -1,20 +0,0 @@
#pragma once
#include <DB/Parsers/IParserBase.h>
namespace DB
{
/** Имя таблицы (с или без имени БД), табличная функция, подзапрос.
* Без модификаторов FINAL, SAMPLE и т. п.
* Без алиаса.
*/
class ParserTableExpression : public IParserBase
{
protected:
const char * getName() const { return "table or subquery or table function"; }
bool parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected);
};
}

View File

@ -0,0 +1,49 @@
#pragma once
#include <DB/Parsers/IParserBase.h>
namespace DB
{
/** List of single or multiple JOIN-ed tables or subqueries in SELECT query, with ARRAY JOINs and SAMPLE, FINAL modifiers.
*/
class ParserTablesInSelectQuery : public IParserBase
{
protected:
const char * getName() const { return "table, table function, subquery or list of joined tables"; }
bool parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected);
};
class ParserTablesInSelectQueryElement : public IParserBase
{
public:
ParserTablesInSelectQueryElement(bool is_first) : is_first(is_first) {}
protected:
const char * getName() const { return "table, table function, subquery or list of joined tables"; }
bool parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected);
private:
bool is_first;
};
class ParserTableExpression : public IParserBase
{
protected:
const char * getName() const { return "table or subquery or table function"; }
bool parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected);
};
class ParserArrayJoin : public IParserBase
{
protected:
const char * getName() const { return "array join"; }
bool parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected);
};
}

View File

@ -191,12 +191,7 @@ ASTPtr ASTSelectQuery::cloneImpl(bool traverse_union_all) const
* то на разных серверах получатся разные идентификаторы.
*/
CLONE(select_expression_list)
CLONE(database)
CLONE(table)
CLONE(array_join_expression_list)
CLONE(join)
CLONE(sample_size)
CLONE(sample_offset)
CLONE(tables)
CLONE(prewhere_expression)
CLONE(where_expression)
CLONE(group_expression_list)
@ -244,69 +239,10 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
? select_expression_list->formatImpl(s, state, frame)
: typeid_cast<const ASTExpressionList &>(*select_expression_list).formatImplMultiline(s, state, frame);
if (table)
if (tables)
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "FROM " << (s.hilite ? hilite_none : "");
if (typeid_cast<const ASTSelectQuery *>(&*table))
{
if (s.one_line)
s.ostr << " (";
else
s.ostr << "\n" << indent_str << "(\n";
FormatStateStacked frame_with_indent = frame;
++frame_with_indent.indent;
table->formatImpl(s, state, frame_with_indent);
if (s.one_line)
s.ostr << ")";
else
s.ostr << "\n" << indent_str << ")";
}
else
{
if (database)
{
database->formatImpl(s, state, frame);
s.ostr << ".";
}
table->formatImpl(s, state, frame);
}
}
if (final)
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "FINAL" << (s.hilite ? hilite_none : "");
}
if (sample_size)
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "SAMPLE " << (s.hilite ? hilite_none : "");
sample_size->formatImpl(s, state, frame);
if (sample_offset)
{
s.ostr << (s.hilite ? hilite_keyword : "") << ' ' << "OFFSET " << (s.hilite ? hilite_none : "");
sample_offset->formatImpl(s, state, frame);
}
}
if (array_join_expression_list)
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str
<< (array_join_is_left ? "LEFT " : "") << "ARRAY JOIN " << (s.hilite ? hilite_none : "");
s.one_line
? array_join_expression_list->formatImpl(s, state, frame)
: typeid_cast<const ASTExpressionList &>(*array_join_expression_list).formatImplMultiline(s, state, frame);
}
if (join)
{
s.ostr << " ";
join->formatImpl(s, state, frame);
tables->formatImpl(s, state, frame);
}
if (prewhere_expression)

View File

@ -8,7 +8,7 @@ do \
{ \
if (member) \
{ \
res->member = std::static_pointer_cast<decltype(res->member)::element_type>(member->clone()); \
res->member = member->clone(); \
res->children.push_back(res->member); \
} \
} \
@ -180,14 +180,14 @@ void ASTTablesInSelectQueryElement::formatImpl(const FormatSettings & settings,
if (table_expression)
{
if (table_join)
table_join->formatImplBeforeTable(settings, state, frame);
static_cast<const ASTTableJoin &>(*table_join).formatImplBeforeTable(settings, state, frame);
settings.ostr << " ";
table_expression->formatImpl(settings, state, frame);
settings.ostr << " ";
if (table_join)
table_join->formatImplAfterTable(settings, state, frame);
static_cast<const ASTTableJoin &>(*table_join).formatImplAfterTable(settings, state, frame);
}
else if (array_join)
{

View File

@ -9,7 +9,8 @@
#include <DB/Parsers/ParserSetQuery.h>
#include <DB/Parsers/ParserSampleRatio.h>
#include <DB/Parsers/ParserSelectQuery.h>
#include <DB/Parsers/ParserTableExpression.h>
#include <DB/Parsers/ParserTablesInSelectQuery.h>
namespace DB
{
@ -30,15 +31,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
ParserString s_select("SELECT", true, true);
ParserString s_distinct("DISTINCT", true, true);
ParserString s_from("FROM", true, true);
ParserString s_left("LEFT", true, true);
ParserString s_array("ARRAY", true, true);
ParserString s_join("JOIN", true, true);
ParserString s_using("USING", true, true);
ParserString s_prewhere("PREWHERE", true, true);
ParserString s_where("WHERE", true, true);
ParserString s_final("FINAL", true, true);
ParserString s_sample("SAMPLE", true, true);
ParserString s_offset("OFFSET", true, true);
ParserString s_group("GROUP", true, true);
ParserString s_by("BY", true, true);
ParserString s_with("WITH", true, true);
@ -51,7 +45,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
ParserString s_all("ALL", true, true);
ParserNotEmptyExpressionList exp_list(false);
ParserNotEmptyExpressionList exp_list_for_select_clause(true); /// Разрешает алиасы без слова AS.
ParserNotEmptyExpressionList exp_list_for_select_clause(true); /// Allows aliases without AS keyword.
ParserExpressionWithOptionalAlias exp_elem(false);
ParserJoin join;
ParserOrderByExpressionList order_list;
@ -82,111 +76,12 @@ bool ParserSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_p
{
ws.ignore(pos, end);
ParserWithOptionalAlias table_p(std::make_unique<ParserTableExpression>(), true);
if (!table_p.parse(pos, end, select_query->table, max_parsed_pos, expected))
return false;
/// Раскрываем составной идентификатор в имя БД и имя таблицы. NOTE Можно избавиться от этого в будущем.
if (const ASTIdentifier * table_identifier = typeid_cast<const ASTIdentifier *>(select_query->table.get()))
{
if (table_identifier->children.size() > 2)
throw Exception("Too many components to table. Table may be specified either in database.table or in table form",
ErrorCodes::SYNTAX_ERROR);
if (table_identifier->children.size() == 2)
{
select_query->database = table_identifier->children.at(0);
typeid_cast<ASTIdentifier &>(*select_query->database).kind = ASTIdentifier::Database;
select_query->table = table_identifier->children.at(1);
typeid_cast<ASTIdentifier &>(*select_query->table).kind = ASTIdentifier::Table;
}
}
ws.ignore(pos, end);
}
/** FINAL и SAMPLE может быть здесь или после всех JOIN-ов
* (второй вариант был изначально сделан по ошибке, и его приходится поддерживать).
*/
auto parse_final_and_sample = [&]() -> bool
{
/// FINAL
if (!select_query->final
&& s_final.ignore(pos, end, max_parsed_pos, expected))
{
select_query->final = true;
ws.ignore(pos, end);
}
/// SAMPLE number
if (!select_query->sample_size
&& s_sample.ignore(pos, end, max_parsed_pos, expected))
{
ws.ignore(pos, end);
ParserSampleRatio ratio;
if (!ratio.parse(pos, end, select_query->sample_size, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
/// OFFSET number
if (s_offset.ignore(pos, end, max_parsed_pos, expected))
{
ws.ignore(pos, end);
if (!ratio.parse(pos, end, select_query->sample_offset, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
}
}
return true;
};
if (!parse_final_and_sample())
return false;
/// [LEFT] ARRAY JOIN expr list
Pos saved_pos = pos;
bool has_array_join = false;
if (s_left.ignore(pos, end, max_parsed_pos, expected) && ws.ignore(pos, end) && s_array.ignore(pos, end, max_parsed_pos, expected))
{
select_query->array_join_is_left = true;
has_array_join = true;
}
else
{
pos = saved_pos;
if (s_array.ignore(pos, end, max_parsed_pos, expected))
has_array_join = true;
}
if (has_array_join)
{
ws.ignore(pos, end);
if (!s_join.ignore(pos, end, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
if (!exp_list.parse(pos, end, select_query->array_join_expression_list, max_parsed_pos, expected))
if (!ParserTablesInSelectQuery().parse(pos, end, select_query->tables, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
}
/// [GLOBAL] [ANY|ALL] INNER|LEFT|RIGHT|FULL|CROSS [OUTER] JOIN (subquery)|table_name USING tuple
join.parse(pos, end, select_query->join, max_parsed_pos, expected);
if (!parse_final_and_sample())
return false;
/// PREWHERE expr
if (s_prewhere.ignore(pos, end, max_parsed_pos, expected))
{

View File

@ -1,63 +0,0 @@
#include <DB/Parsers/CommonParsers.h>
#include <DB/Parsers/ExpressionElementParsers.h>
#include <DB/Parsers/ExpressionListParsers.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ParserSelectQuery.h>
#include <DB/Parsers/ParserTableExpression.h>
namespace DB
{
bool ParserTableExpression::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
{
ParserWhiteSpaceOrComments ws;
ParserString s_lparen("(");
ParserString s_rparen(")");
ParserCompoundIdentifier ident;
ParserFunction table_function;
Pos before = pos;
if (s_lparen.ignore(pos, end, max_parsed_pos, expected))
{
/// Подзапрос.
ws.ignore(pos, end);
ParserSelectQuery select_p;
if (!select_p.parse(pos, end, node, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
if (!s_rparen.ignore(pos, end, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
}
else if (ident.parse(pos, end, node, max_parsed_pos, expected))
{
/// Если сразу после identifier идет скобка, значит это должна быть табличная функция
if (s_lparen.ignore(pos, end, max_parsed_pos, expected))
{
pos = before;
if (!table_function.parse(pos, end, node, max_parsed_pos, expected))
return false;
if (node)
typeid_cast<ASTFunction &>(*node).kind = ASTFunction::TABLE_FUNCTION;
ws.ignore(pos, end);
}
else
{
ws.ignore(pos, end);
typeid_cast<ASTIdentifier &>(*node).kind = ASTIdentifier::Table;
}
}
else
return false;
return true;
}
}

View File

@ -0,0 +1,297 @@
#include <DB/Parsers/CommonParsers.h>
#include <DB/Parsers/ExpressionElementParsers.h>
#include <DB/Parsers/ExpressionListParsers.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTTablesInSelectQuery.h>
#include <DB/Parsers/ParserSelectQuery.h>
#include <DB/Parsers/ParserSampleRatio.h>
#include <DB/Parsers/ParserTablesInSelectQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int SYNTAX_ERROR;
}
bool ParserTableExpression::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
{
ParserWhiteSpaceOrComments ws;
auto res = std::make_shared<ASTTableExpression>();
ws.ignore(pos, end);
if (ParserSubquery().parse(pos, end, res->subquery, max_parsed_pos, expected))
{
}
else if (ParserFunction().parse(pos, end, res->table_function, max_parsed_pos, expected))
{
static_cast<ASTFunction &>(*res->table_function).kind = ASTFunction::TABLE_FUNCTION;
}
else if (ParserCompoundIdentifier().parse(pos, end, res->database_and_table_name, max_parsed_pos, expected))
{
static_cast<ASTIdentifier &>(*res->database_and_table_name).kind = ASTIdentifier::Table;
}
else
return false;
ws.ignore(pos, end);
/// FINAL
if (ParserString("FINAL", true, true).ignore(pos, end, max_parsed_pos, expected))
res->final = true;
ws.ignore(pos, end);
/// SAMPLE number
if (ParserString("SAMPLE", true, true).ignore(pos, end, max_parsed_pos, expected))
{
ws.ignore(pos, end);
ParserSampleRatio ratio;
if (!ratio.parse(pos, end, res->sample_size, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
/// OFFSET number
if (ParserString("OFFSET", true, true).ignore(pos, end, max_parsed_pos, expected))
{
ws.ignore(pos, end);
if (!ratio.parse(pos, end, res->sample_offset, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
}
}
if (res->database_and_table_name)
res->children.emplace_back(res->database_and_table_name);
if (res->table_function)
res->children.emplace_back(res->table_function);
if (res->subquery)
res->children.emplace_back(res->subquery);
if (res->sample_size)
res->children.emplace_back(res->sample_size);
if (res->sample_offset)
res->children.emplace_back(res->sample_offset);
node = res;
return true;
}
bool ParserArrayJoin::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
{
ParserWhiteSpaceOrComments ws;
auto res = std::make_shared<ASTArrayJoin>();
ws.ignore(pos, end);
/// [LEFT] ARRAY JOIN expr list
Pos saved_pos = pos;
bool has_array_join = false;
if (ParserString("LEFT", true, true).ignore(pos, end, max_parsed_pos, expected)
&& ws.ignore(pos, end)
&& ParserString("ARRAY", true, true).ignore(pos, end, max_parsed_pos, expected)
&& ws.ignore(pos, end)
&& ParserString("JOIN", true, true).ignore(pos, end, max_parsed_pos, expected))
{
res->kind = ASTArrayJoin::Kind::Left;
has_array_join = true;
}
else
{
pos = saved_pos;
/// INNER may be specified explicitly, otherwise it is assumed as default.
ParserString("INNER", true, true).ignore(pos, end, max_parsed_pos, expected)
&& ws.ignore(pos, end);
if (ParserString("ARRAY", true, true).ignore(pos, end, max_parsed_pos, expected)
&& ws.ignore(pos, end)
&& ParserString("JOIN", true, true).ignore(pos, end, max_parsed_pos, expected))
{
res->kind = ASTArrayJoin::Kind::Inner;
has_array_join = true;
}
}
if (!has_array_join)
return false;
ws.ignore(pos, end);
if (!ParserExpressionList(false).parse(pos, end, res->expression_list, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
if (res->expression_list)
res->children.emplace_back(res->expression_list);
node = res;
return true;
}
bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
{
ParserWhiteSpaceOrComments ws;
auto res = std::make_shared<ASTTablesInSelectQueryElement>();
ws.ignore(pos, end);
if (ParserArrayJoin().parse(pos, end, res->array_join, max_parsed_pos, expected))
{
}
else if (is_first)
{
if (!ParserTableExpression().parse(pos, end, res->table_expression, max_parsed_pos, expected))
return false;
}
else
{
auto table_join = std::make_shared<ASTTableJoin>();
if (ParserString(",").ignore(pos, end, max_parsed_pos, expected))
{
table_join->kind = ASTTableJoin::Kind::Comma;
}
else
{
if (ParserString("GLOBAL", true, true).ignore(pos, end))
table_join->locality = ASTTableJoin::Locality::Global;
else if (ParserString("LOCAL", true, true).ignore(pos, end))
table_join->locality = ASTTableJoin::Locality::Local;
ws.ignore(pos, end);
if (ParserString("ANY", true, true).ignore(pos, end))
table_join->strictness = ASTTableJoin::Strictness::Any;
else if (ParserString("ALL", true, true).ignore(pos, end))
table_join->strictness = ASTTableJoin::Strictness::All;
ws.ignore(pos, end);
if (ParserString("INNER", true, true).ignore(pos, end))
table_join->kind = ASTTableJoin::Kind::Inner;
else if (ParserString("LEFT", true, true).ignore(pos, end))
table_join->kind = ASTTableJoin::Kind::Left;
else if (ParserString("RIGHT", true, true).ignore(pos, end))
table_join->kind = ASTTableJoin::Kind::Right;
else if (ParserString("FULL", true, true).ignore(pos, end))
table_join->kind = ASTTableJoin::Kind::Full;
else if (ParserString("CROSS", true, true).ignore(pos, end))
table_join->kind = ASTTableJoin::Kind::Cross;
else
{
/// Maybe need use INNER by default as in another DBMS.
expected = "INNER|LEFT|RIGHT|FULL|CROSS";
return false;
}
if (table_join->strictness != ASTTableJoin::Strictness::Unspecified
&& table_join->kind == ASTTableJoin::Kind::Cross)
throw Exception("You must not specify ANY or ALL for CROSS JOIN.", ErrorCodes::SYNTAX_ERROR);
ws.ignore(pos, end);
if (!ParserString("JOIN", true, true).ignore(pos, end, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
}
if (!ParserTableExpression().parse(pos, end, res->table_expression, max_parsed_pos, expected))
return false;
if (table_join->kind != ASTTableJoin::Kind::Comma
&& table_join->kind != ASTTableJoin::Kind::Cross)
{
ws.ignore(pos, end);
if (ParserString("USING", true, true).ignore(pos, end, max_parsed_pos, expected))
{
ws.ignore(pos, end);
/// Expression for USING could be in parentheses or not.
bool in_parens = ParserString("(").ignore(pos, end);
if (in_parens)
ws.ignore(pos, end);
if (!ParserExpressionList(false).parse(pos, end, table_join->using_expression_list, max_parsed_pos, expected))
return false;
if (in_parens)
{
ws.ignore(pos, end);
if (!ParserString(")").ignore(pos, end))
return false;
}
ws.ignore(pos, end);
}
else if (ParserString("ON", true, true).ignore(pos, end, max_parsed_pos, expected))
{
ws.ignore(pos, end);
if (!ParserExpressionElement().parse(pos, end, table_join->on_expression, max_parsed_pos, expected))
return false;
ws.ignore(pos, end);
}
else
{
expected = "USING or ON";
return false;
}
}
if (table_join->using_expression_list)
table_join->children.emplace_back(table_join->using_expression_list);
if (table_join->on_expression)
table_join->children.emplace_back(table_join->on_expression);
res->table_join = table_join;
}
ws.ignore(pos, end);
if (res->table_expression)
res->children.emplace_back(res->table_expression);
if (res->table_join)
res->children.emplace_back(res->table_join);
if (res->array_join)
res->children.emplace_back(res->array_join);
node = res;
return true;
}
bool ParserTablesInSelectQuery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
{
auto res = std::make_shared<ASTTablesInSelectQuery>();
ASTPtr child;
if (ParserTablesInSelectQueryElement(true).parse(pos, end, child, max_parsed_pos, expected))
res->children.emplace_back(child);
else
return false;
while (ParserTablesInSelectQueryElement(false).parse(pos, end, child, max_parsed_pos, expected))
res->children.emplace_back(child);
return true;
}
}