ClickHouse/dbms/src/Parsers/ExpressionElementParsers.cpp

671 lines
17 KiB
C++
Raw Normal View History

2010-06-24 19:12:10 +00:00
#include <errno.h>
#include <cstdlib>
2011-11-01 17:57:37 +00:00
#include <DB/IO/ReadHelpers.h>
2010-06-24 19:12:10 +00:00
#include <DB/Parsers/IAST.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTFunction.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTLiteral.h>
2011-08-28 08:50:27 +00:00
#include <DB/Parsers/ASTAsterisk.h>
2011-09-04 05:14:52 +00:00
#include <DB/Parsers/ASTOrderByElement.h>
2012-08-22 18:46:09 +00:00
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Parsers/ASTSubquery.h>
2010-06-24 19:12:10 +00:00
#include <DB/Parsers/CommonParsers.h>
#include <DB/Parsers/ExpressionListParsers.h>
2012-08-22 18:46:09 +00:00
#include <DB/Parsers/ParserSelectQuery.h>
2010-06-24 19:12:10 +00:00
#include <DB/Parsers/ExpressionElementParsers.h>
#include <DB/Parsers/formatAST.h>
2010-06-24 19:12:10 +00:00
namespace DB
{
bool ParserArray::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Pos begin = pos;
ASTPtr contents_node;
ParserString open("["), close("]");
ParserExpressionList contents(false);
2010-06-24 19:12:10 +00:00
ParserWhiteSpaceOrComments ws;
if (!open.ignore(pos, end, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return false;
ws.ignore(pos, end);
if (!contents.parse(pos, end, contents_node, max_parsed_pos, expected))
2014-07-18 19:47:28 +00:00
return false;
2010-06-24 19:12:10 +00:00
ws.ignore(pos, end);
if (!close.ignore(pos, end, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return false;
ASTFunction * function_node = new ASTFunction(StringRange(begin, pos));
function_node->name = "array";
function_node->arguments = contents_node;
2011-08-13 21:05:18 +00:00
function_node->children.push_back(contents_node);
2010-06-24 19:12:10 +00:00
node = function_node;
return true;
}
bool ParserParenthesisExpression::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Pos begin = pos;
ASTPtr contents_node;
ParserString open("("), close(")");
ParserExpressionList contents(false);
2010-06-24 19:12:10 +00:00
ParserWhiteSpaceOrComments ws;
if (!open.ignore(pos, end, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return false;
ws.ignore(pos, end);
if (!contents.parse(pos, end, contents_node, max_parsed_pos, expected))
2014-07-18 19:47:28 +00:00
return false;
2010-06-24 19:12:10 +00:00
ws.ignore(pos, end);
if (!close.ignore(pos, end, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return false;
ASTExpressionList & expr_list = typeid_cast<ASTExpressionList &>(*contents_node);
2010-06-24 19:12:10 +00:00
/// пустое выражение в скобках недопустимо
if (expr_list.children.empty())
{
expected = "non-empty parenthesized list of expressions";
2010-06-24 19:12:10 +00:00
return false;
}
if (expr_list.children.size() == 1)
{
node = expr_list.children.front();
}
else
{
ASTFunction * function_node = new ASTFunction(StringRange(begin, pos));
function_node->name = "tuple";
function_node->arguments = contents_node;
2011-08-13 21:05:18 +00:00
function_node->children.push_back(contents_node);
2010-06-24 19:12:10 +00:00
node = function_node;
}
return true;
}
2012-08-22 18:46:09 +00:00
bool ParserSubquery::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2012-08-22 18:46:09 +00:00
{
Pos begin = pos;
ASTPtr select_node;
ParserString open("("), close(")");
ParserSelectQuery select;
ParserWhiteSpaceOrComments ws;
if (!open.ignore(pos, end, max_parsed_pos, expected))
2012-08-22 18:46:09 +00:00
return false;
ws.ignore(pos, end);
if (!select.parse(pos, end, select_node, max_parsed_pos, expected))
2014-07-18 19:47:28 +00:00
return false;
2012-08-22 18:46:09 +00:00
ws.ignore(pos, end);
if (!close.ignore(pos, end, max_parsed_pos, expected))
2012-08-22 18:46:09 +00:00
return false;
node = new ASTSubquery(StringRange(begin, pos));
typeid_cast<ASTSubquery &>(*node).children.push_back(select_node);
2012-08-22 18:46:09 +00:00
return true;
}
2010-06-24 19:12:10 +00:00
bool ParserIdentifier::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Pos begin = pos;
/// Идентификатор в обратных кавычках
if (pos != end && *pos == '`')
{
ReadBuffer buf(const_cast<char *>(pos), end - pos, 0);
String s;
readBackQuotedString(s, buf);
if (s.empty()) /// Не разрешены идентификаторы "пустая строка".
return false;
pos += buf.count();
node = new ASTIdentifier(StringRange(begin, pos), s);
return true;
}
else
{
while (pos != end
&& ((*pos >= 'a' && *pos <= 'z')
|| (*pos >= 'A' && *pos <= 'Z')
|| (*pos == '_')
|| (pos != begin && *pos >= '0' && *pos <= '9')))
++pos;
if (pos != begin)
{
node = new ASTIdentifier(StringRange(begin, pos), String(begin, pos - begin));
return true;
}
else
return false;
}
}
bool ParserCompoundIdentifier::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
{
Pos begin = pos;
ASTPtr id_list;
if (!ParserList(ParserPtr(new ParserIdentifier), ParserPtr(new ParserString(".")), false)
.parse(pos, end, id_list, max_parsed_pos, expected))
return false;
String name;
const ASTExpressionList & list = static_cast<const ASTExpressionList &>(*id_list.get());
for (const auto & child : list.children)
2010-06-24 19:12:10 +00:00
{
if (!name.empty())
name += '.';
name += static_cast<const ASTIdentifier &>(*child.get()).name;
2010-06-24 19:12:10 +00:00
}
2011-11-01 17:57:37 +00:00
node = new ASTIdentifier(StringRange(begin, pos), name);
/// В children запомним идентификаторы-составляющие, если их больше одного.
if (list.children.size() > 1)
node->children.insert(node->children.end(), list.children.begin(), list.children.end());
return true;
2010-06-24 19:12:10 +00:00
}
bool ParserFunction::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Pos begin = pos;
ParserIdentifier id_parser;
ParserString open("("), close(")");
ParserExpressionList contents(false);
2010-06-24 19:12:10 +00:00
ParserWhiteSpaceOrComments ws;
ASTPtr identifier;
ASTPtr expr_list_args;
ASTPtr expr_list_params;
2010-06-24 19:12:10 +00:00
if (!id_parser.parse(pos, end, identifier, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return false;
ws.ignore(pos, end);
if (!open.ignore(pos, end, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return false;
ws.ignore(pos, end);
Pos contents_begin = pos;
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
2014-07-18 19:47:28 +00:00
return false;
Pos contents_end = pos;
2010-06-24 19:12:10 +00:00
ws.ignore(pos, end);
if (!close.ignore(pos, end, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return false;
/** Проверка на распространённый случай ошибки - часто из-за сложности квотирования аргументов командной строки,
* в запрос попадает выражение вида toDate(2014-01-01) вместо toDate('2014-01-01').
* Если не сообщить, что первый вариант - ошибка, то аргумент будет проинтерпретирован как 2014 - 01 - 01 - некоторое число,
* и запрос тихо вернёт неожиданный результат.
*/
if (typeid_cast<const ASTIdentifier &>(*identifier).name == "toDate"
&& contents_end - contents_begin == strlen("2014-01-01")
&& contents_begin[0] >= '2' && contents_begin[0] <= '3'
&& contents_begin[1] >= '0' && contents_begin[1] <= '9'
&& contents_begin[2] >= '0' && contents_begin[2] <= '9'
&& contents_begin[3] >= '0' && contents_begin[3] <= '9'
&& contents_begin[4] == '-'
&& contents_begin[5] >= '0' && contents_begin[5] <= '9'
&& contents_begin[6] >= '0' && contents_begin[6] <= '9'
&& contents_begin[7] == '-'
&& contents_begin[8] >= '0' && contents_begin[8] <= '9'
&& contents_begin[9] >= '0' && contents_begin[9] <= '9')
{
std::string contents(contents_begin, contents_end - contents_begin);
throw Exception("Argument of function toDate is unquoted: toDate(" + contents + "), must be: toDate('" + contents + "')"
, ErrorCodes::SYNTAX_ERROR);
}
/// У параметрической агрегатной функции - два списка (параметры и аргументы) в круглых скобках. Пример: quantile(0.9)(x).
if (open.ignore(pos, end, max_parsed_pos, expected))
{
expr_list_params = expr_list_args;
2014-04-08 07:31:51 +00:00
expr_list_args = nullptr;
ws.ignore(pos, end);
if (!contents.parse(pos, end, expr_list_args, max_parsed_pos, expected))
2014-07-18 19:47:28 +00:00
return false;
ws.ignore(pos, end);
if (!close.ignore(pos, end, max_parsed_pos, expected))
return false;
}
2010-06-24 19:12:10 +00:00
ASTFunction * function_node = new ASTFunction(StringRange(begin, pos));
function_node->name = typeid_cast<ASTIdentifier &>(*identifier).name;
function_node->arguments = expr_list_args;
function_node->children.push_back(function_node->arguments);
if (expr_list_params)
{
function_node->parameters = expr_list_params;
function_node->children.push_back(function_node->parameters);
}
2010-06-24 19:12:10 +00:00
node = function_node;
return true;
}
bool ParserNull::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Pos begin = pos;
2010-06-25 16:36:13 +00:00
ParserString nested_parser("NULL", true);
if (nested_parser.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
{
node = new ASTLiteral(StringRange(StringRange(begin, pos)), Null());
return true;
}
else
return false;
}
bool ParserNumber::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Field res;
Pos begin = pos;
if (pos == end)
return false;
/** Максимальная длина числа. 319 символов достаточно, чтобы записать максимальный double в десятичной форме.
* Лишнее копирование нужно, чтобы воспользоваться функциями strto*, которым нужна 0-терминированная строка.
*/
char buf[320];
size_t bytes_to_copy = end - pos < 319 ? end - pos : 319;
memcpy(buf, pos, bytes_to_copy);
buf[bytes_to_copy] = 0;
char * pos_double = buf;
errno = 0; /// Функции strto* не очищают errno.
Float64 float_value = std::strtod(buf, &pos_double);
if (pos_double == buf || errno == ERANGE)
2010-06-24 19:12:10 +00:00
{
expected = "number";
2010-06-24 19:12:10 +00:00
return false;
}
res = float_value;
/// попробуем использовать более точный тип - UInt64 или Int64
char * pos_integer = buf;
2010-06-24 19:12:10 +00:00
if (float_value < 0)
{
errno = 0;
Int64 int_value = std::strtoll(buf, &pos_integer, 0);
if (pos_integer == pos_double && errno != ERANGE)
2010-06-24 19:12:10 +00:00
res = int_value;
}
else
{
errno = 0;
UInt64 uint_value = std::strtoull(buf, &pos_integer, 0);
if (pos_integer == pos_double && errno != ERANGE)
2010-06-24 19:12:10 +00:00
res = uint_value;
}
pos += pos_double - buf;
2010-06-24 19:12:10 +00:00
node = new ASTLiteral(StringRange(begin, pos), res);
return true;
}
bool ParserUnsignedInteger::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
{
Field res;
Pos begin = pos;
if (pos == end)
return false;
UInt64 x = 0;
ReadBuffer in(const_cast<char *>(pos), end - pos, 0);
if (!tryReadIntText(x, in) || in.offset() == 0)
{
expected = "unsigned integer";
return false;
}
res = x;
pos += in.offset();
node = new ASTLiteral(StringRange(begin, pos), res);
return true;
}
bool ParserStringLiteral::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Pos begin = pos;
String s;
if (pos == end || *pos != '\'')
{
expected = "opening single quote";
return false;
}
2010-06-25 16:36:13 +00:00
++pos;
2010-06-24 19:12:10 +00:00
while (pos != end)
{
size_t bytes = 0;
for (; pos + bytes != end; ++bytes)
if (pos[bytes] == '\\' || pos[bytes] == '\'')
break;
s.append(pos, bytes);
pos += bytes;
if (*pos == '\'')
{
++pos;
node = new ASTLiteral(StringRange(begin, pos), s);
return true;
}
if (*pos == '\\')
{
++pos;
if (pos == end)
{
expected = "escape sequence";
return false;
}
s += parseEscapeSequence(*pos);
++pos;
}
}
expected = "closing single quote";
return false;
}
bool ParserArrayOfLiterals::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
{
Pos begin = pos;
Array arr;
if (pos == end || *pos != '[')
{
expected = "opening square bracket";
return false;
}
ParserWhiteSpaceOrComments ws;
ParserLiteral literal_p;
++pos;
while (pos != end)
{
ws.ignore(pos, end);
if (!arr.empty())
{
if (*pos == ']')
{
++pos;
node = new ASTLiteral(StringRange(begin, pos), arr);
return true;
}
else if (*pos == ',')
{
++pos;
}
else
{
expected = "comma or closing square bracket";
return false;
}
}
ws.ignore(pos, end);
ASTPtr literal_node;
if (!literal_p.parse(pos, end, literal_node, max_parsed_pos, expected))
return false;
arr.push_back(typeid_cast<const ASTLiteral &>(*literal_node).value);
}
expected = "closing square bracket";
return false;
}
bool ParserLiteral::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
ParserNull null_p;
ParserNumber num_p;
ParserStringLiteral str_p;
if (null_p.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return true;
if (num_p.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return true;
if (str_p.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return true;
expected = "literal: one of NULL, number, single quoted string";
2010-06-24 19:12:10 +00:00
return false;
}
const char * ParserAlias::restricted_keywords[] =
{
"FROM",
2015-11-08 23:14:24 +00:00
"FINAL",
"SAMPLE",
2015-11-08 17:37:15 +00:00
"ARRAY",
2015-11-08 01:33:33 +00:00
"LEFT",
"RIGHT",
"INNER",
2015-11-08 23:14:24 +00:00
"FULL",
2015-11-08 01:33:33 +00:00
"CROSS",
"JOIN",
2015-11-08 02:07:13 +00:00
"ANY",
"ALL",
2015-11-08 01:33:33 +00:00
"ON",
"USING",
2015-11-08 23:23:31 +00:00
"PREWHERE",
"WHERE",
"GROUP",
"WITH",
"HAVING",
"ORDER",
"LIMIT",
"SETTINGS",
"FORMAT",
"UNION",
nullptr
};
bool ParserAlias::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2011-11-06 04:21:09 +00:00
{
ParserWhiteSpaceOrComments ws;
ParserString s_as("AS", true, true);
ParserIdentifier id_p;
2015-11-08 01:44:02 +00:00
bool res = s_as.parse(pos, end, node, max_parsed_pos, expected);
if (!allow_alias_without_as_keyword && !res)
2011-11-06 04:21:09 +00:00
return false;
ws.ignore(pos, end);
if (!id_p.parse(pos, end, node, max_parsed_pos, expected))
2011-11-06 04:21:09 +00:00
return false;
if (allow_alias_without_as_keyword)
{
/** В этом случае алиас не может совпадать с ключевым словом - для того,
* чтобы в запросе "SELECT x FROM t", слово FROM не считалось алиасом,
* а в запросе "SELECT x FRO FROM t", слово FRO считалось алиасом.
*/
const String & name = static_cast<const ASTIdentifier &>(*node.get()).name;
for (const char ** keyword = restricted_keywords; *keyword != nullptr; ++keyword)
if (0 == strcasecmp(name.data(), *keyword))
return false;
}
2011-11-06 04:21:09 +00:00
return true;
}
bool ParserExpressionElement::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Pos begin = pos;
ParserParenthesisExpression paren_p;
2012-08-22 18:46:09 +00:00
ParserSubquery subquery_p;
2010-06-24 19:12:10 +00:00
ParserArray array_p;
ParserArrayOfLiterals array_lite_p;
2010-06-24 19:12:10 +00:00
ParserLiteral lit_p;
ParserFunction fun_p;
ParserCompoundIdentifier id_p;
2011-08-28 08:50:27 +00:00
ParserString asterisk_p("*");
2010-06-24 19:12:10 +00:00
if (subquery_p.parse(pos, end, node, max_parsed_pos, expected))
2012-08-22 18:46:09 +00:00
return true;
if (paren_p.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return true;
if (array_lite_p.parse(pos, end, node, max_parsed_pos, expected))
return true;
if (array_p.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return true;
if (lit_p.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return true;
if (fun_p.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return true;
if (id_p.parse(pos, end, node, max_parsed_pos, expected))
2010-06-24 19:12:10 +00:00
return true;
if (asterisk_p.parse(pos, end, node, max_parsed_pos, expected))
2011-08-28 08:50:27 +00:00
{
node = new ASTAsterisk(StringRange(begin, pos));
return true;
}
2012-08-22 18:46:09 +00:00
expected = "expression element: one of array, literal, function, identifier, asterisk, parenthised expression, subquery";
2010-06-24 19:12:10 +00:00
return false;
}
bool ParserWithOptionalAlias::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2011-11-06 04:21:09 +00:00
{
ParserWhiteSpaceOrComments ws;
ParserAlias alias_p(allow_alias_without_as_keyword);
2011-11-06 04:21:09 +00:00
if (!elem_parser->parse(pos, end, node, max_parsed_pos, expected))
2011-11-06 04:21:09 +00:00
return false;
ws.ignore(pos, end);
ASTPtr alias_node;
if (alias_p.parse(pos, end, alias_node, max_parsed_pos, expected))
2011-11-06 04:21:09 +00:00
{
String alias_name = typeid_cast<ASTIdentifier &>(*alias_node).name;
if (ASTWithAlias * ast_with_alias = dynamic_cast<ASTWithAlias *>(node.get()))
ast_with_alias->alias = alias_name;
2011-11-06 04:21:09 +00:00
else
{
expected = "alias cannot be here";
return false;
}
}
return true;
}
bool ParserOrderByElement::parseImpl(Pos & pos, Pos end, ASTPtr & node, Pos & max_parsed_pos, Expected & expected)
2011-09-04 05:14:52 +00:00
{
Pos begin = pos;
ParserWhiteSpaceOrComments ws;
ParserExpressionWithOptionalAlias elem_p(false);
2011-09-04 05:14:52 +00:00
ParserString ascending("ASCENDING", true, true);
ParserString descending("DESCENDING", true, true);
ParserString asc("ASC", true, true);
ParserString desc("DESC", true, true);
2013-05-28 16:56:05 +00:00
ParserString collate("COLLATE", true, true);
ParserStringLiteral collate_locale_parser;
2011-09-04 05:14:52 +00:00
ASTPtr expr_elem;
if (!elem_p.parse(pos, end, expr_elem, max_parsed_pos, expected))
2011-09-04 05:14:52 +00:00
return false;
int direction = 1;
ws.ignore(pos, end);
if (descending.ignore(pos, end) || desc.ignore(pos, end))
direction = -1;
else
ascending.ignore(pos, end) || asc.ignore(pos, end);
2013-05-29 11:33:55 +00:00
ws.ignore(pos, end);
2014-04-08 07:31:51 +00:00
Poco::SharedPtr<Collator> collator = nullptr;
2013-05-28 16:56:05 +00:00
if (collate.ignore(pos, end))
{
2013-05-29 11:33:55 +00:00
ws.ignore(pos, end);
2013-05-28 16:56:05 +00:00
ASTPtr locale_node;
if (!collate_locale_parser.parse(pos, end, locale_node, max_parsed_pos, expected))
2013-05-28 16:56:05 +00:00
return false;
const String & locale = typeid_cast<const ASTLiteral &>(*locale_node).value.safeGet<String>();
collator = new Collator(locale);
2013-05-28 16:56:05 +00:00
}
2011-09-04 05:14:52 +00:00
2013-05-28 16:56:05 +00:00
node = new ASTOrderByElement(StringRange(begin, pos), direction, collator);
2011-09-04 05:14:52 +00:00
node->children.push_back(expr_elem);
return true;
}
2010-06-24 19:12:10 +00:00
}