Using lexer (incomplete) [#CLICKHOUSE-2].

This commit is contained in:
Alexey Milovidov 2017-07-12 04:49:20 +03:00 committed by alexey-milovidov
parent 4b1eeee1a3
commit d815b766fa
8 changed files with 70 additions and 49 deletions

View File

@ -221,10 +221,10 @@ Token Lexer::nextToken()
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end); return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
} }
} }
return Token(TokenType::Division, token_begin, pos); return Token(TokenType::Slash, token_begin, pos);
} }
case '%': case '%':
return Token(TokenType::Modulo, token_begin, ++pos); return Token(TokenType::Percent, token_begin, ++pos);
case '=': /// =, == case '=': /// =, ==
{ {
++pos; ++pos;

View File

@ -31,8 +31,8 @@ enum class TokenType
Plus, Plus,
Minus, Minus,
Division, Slash,
Modulo, Percent,
Arrow, /// ->. Should be distinguished from minus operator. Arrow, /// ->. Should be distinguished from minus operator.
QuestionMark, QuestionMark,
Colon, Colon,

View File

@ -71,17 +71,13 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
/// VALUES or FORMAT or SELECT /// VALUES or FORMAT or SELECT
if (s_values.ignore(pos, expected)) if (s_values.ignore(pos, expected))
{ {
data = pos; data = pos->begin;
} }
else if (s_format.ignore(pos, expected)) else if (s_format.ignore(pos, expected))
{ {
if (!name_p.parse(pos, format, expected)) if (!name_p.parse(pos, format, expected))
return false; return false;
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
const char * data_begin = pos->end;
ws_without_nl.ignore(pos);
if (pos->type == TokenType::Semicolon) if (pos->type == TokenType::Semicolon)
throw Exception("You have excessive ';' symbol before data for INSERT.\n" throw Exception("You have excessive ';' symbol before data for INSERT.\n"
"Example:\n\n" "Example:\n\n"
@ -91,10 +87,17 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
"\n" "\n"
"Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR); "Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
if (pos.isValid() && *pos == '\n') /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
++pos; data = pos->begin;
data = pos; while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
++data;
if (data < end && *data == '\r')
++data;
if (data < end && *data == '\n')
++data;
} }
else if (s_select.ignore(pos, expected)) else if (s_select.ignore(pos, expected))
{ {
@ -108,7 +111,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return false; return false;
} }
auto query = std::make_shared<ASTInsertQuery>(StringRange(begin, data ? data : pos)); auto query = std::make_shared<ASTInsertQuery>(StringRange(begin, pos));
node = query; node = query;
if (database) if (database)
@ -121,7 +124,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
query->columns = columns; query->columns = columns;
query->select = select; query->select = select;
query->data = data != end ? data : NULL; query->data = data != end ? data : nullptr;
query->end = end; query->end = end;
if (columns) if (columns)

View File

@ -26,10 +26,11 @@ class ParserInsertQuery : public IParserBase
{ {
private: private:
const char * end; const char * end;
protected:
const char * getName() const override { return "INSERT query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
ParserInsertQuery(const char * end) : end(end) {} ParserInsertQuery(const char * end) : end(end) {}
const char * getName() const { return "INSERT query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
}; };
} }

View File

@ -10,10 +10,11 @@ class ParserQuery : public IParserBase
{ {
private: private:
const char * end; const char * end;
protected:
ParserInsertQuery(const char * end) : end(end) {} const char * getName() const override { return "Query"; }
const char * getName() const { return "Query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected); public:
ParserQuery(const char * end) : end(end) {}
}; };
} }

View File

@ -16,7 +16,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
UInt64 num_after = 0; UInt64 num_after = 0;
Int64 exponent = 0; Int64 exponent = 0;
IParser::Pos pos_after_first_num = tryReadIntText(num_before, pos, end); const char * pos_after_first_num = tryReadIntText(num_before, pos, end);
bool has_num_before_point = pos_after_first_num > pos; bool has_num_before_point = pos_after_first_num > pos;
pos = pos_after_first_num; pos = pos_after_first_num;
@ -32,7 +32,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
if (has_point) if (has_point)
{ {
IParser::Pos pos_after_second_num = tryReadIntText(num_after, pos, end); const char * pos_after_second_num = tryReadIntText(num_after, pos, end);
number_of_digits_after_point = pos_after_second_num - pos; number_of_digits_after_point = pos_after_second_num - pos;
pos = pos_after_second_num; pos = pos_after_second_num;
} }
@ -42,7 +42,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
if (has_exponent) if (has_exponent)
{ {
++pos; ++pos;
IParser::Pos pos_after_exponent = tryReadIntText(exponent, pos, end); const char * pos_after_exponent = tryReadIntText(exponent, pos, end);
if (pos_after_exponent == pos) if (pos_after_exponent == pos)
return false; return false;
@ -92,7 +92,7 @@ bool ParserSampleRatio::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!parseDecimal(pos->begin, pos->end, numerator)) if (!parseDecimal(pos->begin, pos->end, numerator))
return false; return false;
bool has_slash = pos.isValid() && *pos == '/'; bool has_slash = pos->type == TokenType::Slash;
if (has_slash) if (has_slash)
{ {

View File

@ -1,6 +1,8 @@
#include <Parsers/parseQuery.h> #include <Parsers/parseQuery.h>
#include <Parsers/ParserQuery.h> #include <Parsers/ParserQuery.h>
#include <Parsers/ASTInsertQuery.h> #include <Parsers/ASTInsertQuery.h>
#include <Parsers/Lexer.h>
#include <Parsers/TokenIterator.h>
#include <Common/StringUtils.h> #include <Common/StringUtils.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
@ -24,7 +26,7 @@ static std::pair<size_t, size_t> getLineAndCol(const char * begin, const char *
size_t line = 0; size_t line = 0;
const char * nl; const char * nl;
while (nullptr != (nl = reinterpret_cast<IParser::Pos>(memchr(begin, '\n', pos - begin)))) while (nullptr != (nl = reinterpret_cast<const char *>(memchr(begin, '\n', pos - begin))))
{ {
++line; ++line;
begin = nl + 1; begin = nl + 1;
@ -117,7 +119,11 @@ ASTPtr tryParseQuery(
const std::string & description, const std::string & description,
bool allow_multi_statements) bool allow_multi_statements)
{ {
if (pos == end || *pos == ';') Tokens tokens(pos, end);
TokenIterator token_iterator(tokens);
if (token_iterator->type == TokenType::EndOfStream
|| token_iterator->type == TokenType::Semicolon)
{ {
out_error_message = "Empty query"; out_error_message = "Empty query";
return nullptr; return nullptr;
@ -125,33 +131,42 @@ ASTPtr tryParseQuery(
Expected expected = ""; Expected expected = "";
const char * begin = pos; const char * begin = pos;
const char * max_parsed_pos = pos;
ASTPtr res; ASTPtr res;
bool parse_res = parser.parse(pos, res, expected); bool parse_res = parser.parse(token_iterator, res, expected);
/// Parsed query must end with end of data or semicolon. /// Lexical error
if (!parse_res || (pos.isValid() && *pos != ';')) if (!parse_res && token_iterator->type > TokenType::EndOfStream)
{
expected = "any valid token";
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
return nullptr;
}
/// Excessive input after query. Parsed query must end with end of data or semicolon.
if (parse_res && token_iterator->type != TokenType::EndOfStream && token_iterator->type != TokenType::Semicolon)
{ {
if (!expected || !*expected)
expected = "end of query"; expected = "end of query";
out_error_message = getSyntaxErrorMessage(begin, expected, hilite, description); out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
return nullptr; return nullptr;
} }
/// If multi-statements are not allowed, then after semicolon, there must be no non-space characters. /// If multi-statements are not allowed, then after semicolon, there must be no non-space characters.
if (!allow_multi_statements && pos.isValid() && *pos == ';') while (token_iterator->type == TokenType::Semicolon)
{ ++token_iterator;
++pos;
while (pos.isValid() && isWhitespaceASCII(*pos))
++pos;
if (pos.isValid()) if (!allow_multi_statements && token_iterator->type != TokenType::EndOfStream)
{ {
out_error_message = getSyntaxErrorMessage(begin, end, pos, nullptr, hilite, out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, nullptr, hilite,
(description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed"); (description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed");
return nullptr; return nullptr;
} }
/// Parse error.
if (!parse_res)
{
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
return nullptr;
} }
return res; return res;
@ -189,15 +204,16 @@ ASTPtr parseQuery(
std::pair<const char *, bool> splitMultipartQuery(const std::string & queries, std::vector<std::string> & queries_list) std::pair<const char *, bool> splitMultipartQuery(const std::string & queries, std::vector<std::string> & queries_list)
{ {
ASTPtr ast; ASTPtr ast;
ParserQuery parser;
const char * begin = queries.data(); /// begin of current query const char * begin = queries.data(); /// begin of current query
const char * pos = begin; /// parser moves pos from begin to the end of current query const char * pos = begin; /// parser moves pos from begin to the end of current query
const char * end = begin + queries.size(); const char * end = begin + queries.size();
ParserQuery parser(end);
queries_list.clear(); queries_list.clear();
while (pos.isValid()) while (pos < end)
{ {
begin = pos; begin = pos;

View File

@ -34,8 +34,8 @@ std::map<TokenType, const char *> hilite =
{TokenType::Asterisk, "\033[1;33m"}, {TokenType::Asterisk, "\033[1;33m"},
{TokenType::Plus, "\033[1;33m"}, {TokenType::Plus, "\033[1;33m"},
{TokenType::Minus, "\033[1;33m"}, {TokenType::Minus, "\033[1;33m"},
{TokenType::Division, "\033[1;33m"}, {TokenType::Slash, "\033[1;33m"},
{TokenType::Modulo, "\033[1;33m"}, {TokenType::Percent, "\033[1;33m"},
{TokenType::Arrow, "\033[1;33m"}, {TokenType::Arrow, "\033[1;33m"},
{TokenType::QuestionMark, "\033[1;33m"}, {TokenType::QuestionMark, "\033[1;33m"},
{TokenType::Colon, "\033[1;33m"}, {TokenType::Colon, "\033[1;33m"},