Using lexer (incomplete) [#CLICKHOUSE-2].

This commit is contained in:
Alexey Milovidov 2017-07-12 04:49:20 +03:00 committed by alexey-milovidov
parent 4b1eeee1a3
commit d815b766fa
8 changed files with 70 additions and 49 deletions

View File

@ -221,10 +221,10 @@ Token Lexer::nextToken()
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
}
}
return Token(TokenType::Division, token_begin, pos);
return Token(TokenType::Slash, token_begin, pos);
}
case '%':
return Token(TokenType::Modulo, token_begin, ++pos);
return Token(TokenType::Percent, token_begin, ++pos);
case '=': /// =, ==
{
++pos;

View File

@ -31,8 +31,8 @@ enum class TokenType
Plus,
Minus,
Division,
Modulo,
Slash,
Percent,
Arrow, /// ->. Should be distinguished from minus operator.
QuestionMark,
Colon,

View File

@ -71,17 +71,13 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
/// VALUES or FORMAT or SELECT
if (s_values.ignore(pos, expected))
{
data = pos;
data = pos->begin;
}
else if (s_format.ignore(pos, expected))
{
if (!name_p.parse(pos, format, expected))
return false;
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
const char * data_begin = pos->end;
ws_without_nl.ignore(pos);
if (pos->type == TokenType::Semicolon)
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
"Example:\n\n"
@ -91,10 +87,17 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
"\n"
"Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
if (pos.isValid() && *pos == '\n')
++pos;
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
data = pos->begin;
data = pos;
while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
++data;
if (data < end && *data == '\r')
++data;
if (data < end && *data == '\n')
++data;
}
else if (s_select.ignore(pos, expected))
{
@ -108,7 +111,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
return false;
}
auto query = std::make_shared<ASTInsertQuery>(StringRange(begin, data ? data : pos));
auto query = std::make_shared<ASTInsertQuery>(StringRange(begin, pos));
node = query;
if (database)
@ -121,7 +124,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
query->columns = columns;
query->select = select;
query->data = data != end ? data : NULL;
query->data = data != end ? data : nullptr;
query->end = end;
if (columns)

View File

@ -26,10 +26,11 @@ class ParserInsertQuery : public IParserBase
{
private:
const char * end;
protected:
const char * getName() const override { return "INSERT query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
ParserInsertQuery(const char * end) : end(end) {}
const char * getName() const { return "INSERT query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
};
}

View File

@ -10,10 +10,11 @@ class ParserQuery : public IParserBase
{
private:
const char * end;
protected:
ParserInsertQuery(const char * end) : end(end) {}
const char * getName() const { return "Query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
const char * getName() const override { return "Query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
public:
ParserQuery(const char * end) : end(end) {}
};
}

View File

@ -16,7 +16,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
UInt64 num_after = 0;
Int64 exponent = 0;
IParser::Pos pos_after_first_num = tryReadIntText(num_before, pos, end);
const char * pos_after_first_num = tryReadIntText(num_before, pos, end);
bool has_num_before_point = pos_after_first_num > pos;
pos = pos_after_first_num;
@ -32,7 +32,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
if (has_point)
{
IParser::Pos pos_after_second_num = tryReadIntText(num_after, pos, end);
const char * pos_after_second_num = tryReadIntText(num_after, pos, end);
number_of_digits_after_point = pos_after_second_num - pos;
pos = pos_after_second_num;
}
@ -42,7 +42,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
if (has_exponent)
{
++pos;
IParser::Pos pos_after_exponent = tryReadIntText(exponent, pos, end);
const char * pos_after_exponent = tryReadIntText(exponent, pos, end);
if (pos_after_exponent == pos)
return false;
@ -92,7 +92,7 @@ bool ParserSampleRatio::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!parseDecimal(pos->begin, pos->end, numerator))
return false;
bool has_slash = pos.isValid() && *pos == '/';
bool has_slash = pos->type == TokenType::Slash;
if (has_slash)
{

View File

@ -1,6 +1,8 @@
#include <Parsers/parseQuery.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/Lexer.h>
#include <Parsers/TokenIterator.h>
#include <Common/StringUtils.h>
#include <Common/typeid_cast.h>
#include <IO/WriteHelpers.h>
@ -24,7 +26,7 @@ static std::pair<size_t, size_t> getLineAndCol(const char * begin, const char *
size_t line = 0;
const char * nl;
while (nullptr != (nl = reinterpret_cast<IParser::Pos>(memchr(begin, '\n', pos - begin))))
while (nullptr != (nl = reinterpret_cast<const char *>(memchr(begin, '\n', pos - begin))))
{
++line;
begin = nl + 1;
@ -117,7 +119,11 @@ ASTPtr tryParseQuery(
const std::string & description,
bool allow_multi_statements)
{
if (pos == end || *pos == ';')
Tokens tokens(pos, end);
TokenIterator token_iterator(tokens);
if (token_iterator->type == TokenType::EndOfStream
|| token_iterator->type == TokenType::Semicolon)
{
out_error_message = "Empty query";
return nullptr;
@ -125,33 +131,42 @@ ASTPtr tryParseQuery(
Expected expected = "";
const char * begin = pos;
const char * max_parsed_pos = pos;
ASTPtr res;
bool parse_res = parser.parse(pos, res, expected);
bool parse_res = parser.parse(token_iterator, res, expected);
/// Parsed query must end with end of data or semicolon.
if (!parse_res || (pos.isValid() && *pos != ';'))
/// Lexical error
if (!parse_res && token_iterator->type > TokenType::EndOfStream)
{
expected = "any valid token";
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
return nullptr;
}
/// Excessive input after query. Parsed query must end with end of data or semicolon.
if (parse_res && token_iterator->type != TokenType::EndOfStream && token_iterator->type != TokenType::Semicolon)
{
if (!expected || !*expected)
expected = "end of query";
out_error_message = getSyntaxErrorMessage(begin, expected, hilite, description);
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
return nullptr;
}
/// If multi-statements are not allowed, then after semicolon, there must be no non-space characters.
if (!allow_multi_statements && pos.isValid() && *pos == ';')
{
++pos;
while (pos.isValid() && isWhitespaceASCII(*pos))
++pos;
while (token_iterator->type == TokenType::Semicolon)
++token_iterator;
if (pos.isValid())
if (!allow_multi_statements && token_iterator->type != TokenType::EndOfStream)
{
out_error_message = getSyntaxErrorMessage(begin, end, pos, nullptr, hilite,
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, nullptr, hilite,
(description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed");
return nullptr;
}
/// Parse error.
if (!parse_res)
{
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
return nullptr;
}
return res;
@ -189,15 +204,16 @@ ASTPtr parseQuery(
std::pair<const char *, bool> splitMultipartQuery(const std::string & queries, std::vector<std::string> & queries_list)
{
ASTPtr ast;
ParserQuery parser;
const char * begin = queries.data(); /// begin of current query
const char * pos = begin; /// parser moves pos from begin to the end of current query
const char * end = begin + queries.size();
ParserQuery parser(end);
queries_list.clear();
while (pos.isValid())
while (pos < end)
{
begin = pos;

View File

@ -34,8 +34,8 @@ std::map<TokenType, const char *> hilite =
{TokenType::Asterisk, "\033[1;33m"},
{TokenType::Plus, "\033[1;33m"},
{TokenType::Minus, "\033[1;33m"},
{TokenType::Division, "\033[1;33m"},
{TokenType::Modulo, "\033[1;33m"},
{TokenType::Slash, "\033[1;33m"},
{TokenType::Percent, "\033[1;33m"},
{TokenType::Arrow, "\033[1;33m"},
{TokenType::QuestionMark, "\033[1;33m"},
{TokenType::Colon, "\033[1;33m"},