mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-23 18:20:50 +00:00
Using lexer (incomplete) [#CLICKHOUSE-2].
This commit is contained in:
parent
4b1eeee1a3
commit
d815b766fa
@ -221,10 +221,10 @@ Token Lexer::nextToken()
|
|||||||
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
|
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Token(TokenType::Division, token_begin, pos);
|
return Token(TokenType::Slash, token_begin, pos);
|
||||||
}
|
}
|
||||||
case '%':
|
case '%':
|
||||||
return Token(TokenType::Modulo, token_begin, ++pos);
|
return Token(TokenType::Percent, token_begin, ++pos);
|
||||||
case '=': /// =, ==
|
case '=': /// =, ==
|
||||||
{
|
{
|
||||||
++pos;
|
++pos;
|
||||||
|
@ -31,8 +31,8 @@ enum class TokenType
|
|||||||
|
|
||||||
Plus,
|
Plus,
|
||||||
Minus,
|
Minus,
|
||||||
Division,
|
Slash,
|
||||||
Modulo,
|
Percent,
|
||||||
Arrow, /// ->. Should be distinguished from minus operator.
|
Arrow, /// ->. Should be distinguished from minus operator.
|
||||||
QuestionMark,
|
QuestionMark,
|
||||||
Colon,
|
Colon,
|
||||||
|
@ -71,17 +71,13 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
|||||||
/// VALUES or FORMAT or SELECT
|
/// VALUES or FORMAT or SELECT
|
||||||
if (s_values.ignore(pos, expected))
|
if (s_values.ignore(pos, expected))
|
||||||
{
|
{
|
||||||
data = pos;
|
data = pos->begin;
|
||||||
}
|
}
|
||||||
else if (s_format.ignore(pos, expected))
|
else if (s_format.ignore(pos, expected))
|
||||||
{
|
{
|
||||||
if (!name_p.parse(pos, format, expected))
|
if (!name_p.parse(pos, format, expected))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
|
|
||||||
const char * data_begin = pos->end;
|
|
||||||
|
|
||||||
ws_without_nl.ignore(pos);
|
|
||||||
if (pos->type == TokenType::Semicolon)
|
if (pos->type == TokenType::Semicolon)
|
||||||
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
|
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
|
||||||
"Example:\n\n"
|
"Example:\n\n"
|
||||||
@ -91,10 +87,17 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
|||||||
"\n"
|
"\n"
|
||||||
"Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
|
"Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
|
||||||
|
|
||||||
if (pos.isValid() && *pos == '\n')
|
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
|
||||||
++pos;
|
data = pos->begin;
|
||||||
|
|
||||||
data = pos;
|
while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
|
||||||
|
++data;
|
||||||
|
|
||||||
|
if (data < end && *data == '\r')
|
||||||
|
++data;
|
||||||
|
|
||||||
|
if (data < end && *data == '\n')
|
||||||
|
++data;
|
||||||
}
|
}
|
||||||
else if (s_select.ignore(pos, expected))
|
else if (s_select.ignore(pos, expected))
|
||||||
{
|
{
|
||||||
@ -108,7 +111,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto query = std::make_shared<ASTInsertQuery>(StringRange(begin, data ? data : pos));
|
auto query = std::make_shared<ASTInsertQuery>(StringRange(begin, pos));
|
||||||
node = query;
|
node = query;
|
||||||
|
|
||||||
if (database)
|
if (database)
|
||||||
@ -121,7 +124,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
|||||||
|
|
||||||
query->columns = columns;
|
query->columns = columns;
|
||||||
query->select = select;
|
query->select = select;
|
||||||
query->data = data != end ? data : NULL;
|
query->data = data != end ? data : nullptr;
|
||||||
query->end = end;
|
query->end = end;
|
||||||
|
|
||||||
if (columns)
|
if (columns)
|
||||||
|
@ -26,10 +26,11 @@ class ParserInsertQuery : public IParserBase
|
|||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
const char * end;
|
const char * end;
|
||||||
protected:
|
|
||||||
|
const char * getName() const override { return "INSERT query"; }
|
||||||
|
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||||
|
public:
|
||||||
ParserInsertQuery(const char * end) : end(end) {}
|
ParserInsertQuery(const char * end) : end(end) {}
|
||||||
const char * getName() const { return "INSERT query"; }
|
|
||||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -10,10 +10,11 @@ class ParserQuery : public IParserBase
|
|||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
const char * end;
|
const char * end;
|
||||||
protected:
|
|
||||||
ParserInsertQuery(const char * end) : end(end) {}
|
const char * getName() const override { return "Query"; }
|
||||||
const char * getName() const { return "Query"; }
|
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
|
public:
|
||||||
|
ParserQuery(const char * end) : end(end) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
|
|||||||
UInt64 num_after = 0;
|
UInt64 num_after = 0;
|
||||||
Int64 exponent = 0;
|
Int64 exponent = 0;
|
||||||
|
|
||||||
IParser::Pos pos_after_first_num = tryReadIntText(num_before, pos, end);
|
const char * pos_after_first_num = tryReadIntText(num_before, pos, end);
|
||||||
|
|
||||||
bool has_num_before_point = pos_after_first_num > pos;
|
bool has_num_before_point = pos_after_first_num > pos;
|
||||||
pos = pos_after_first_num;
|
pos = pos_after_first_num;
|
||||||
@ -32,7 +32,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
|
|||||||
|
|
||||||
if (has_point)
|
if (has_point)
|
||||||
{
|
{
|
||||||
IParser::Pos pos_after_second_num = tryReadIntText(num_after, pos, end);
|
const char * pos_after_second_num = tryReadIntText(num_after, pos, end);
|
||||||
number_of_digits_after_point = pos_after_second_num - pos;
|
number_of_digits_after_point = pos_after_second_num - pos;
|
||||||
pos = pos_after_second_num;
|
pos = pos_after_second_num;
|
||||||
}
|
}
|
||||||
@ -42,7 +42,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
|
|||||||
if (has_exponent)
|
if (has_exponent)
|
||||||
{
|
{
|
||||||
++pos;
|
++pos;
|
||||||
IParser::Pos pos_after_exponent = tryReadIntText(exponent, pos, end);
|
const char * pos_after_exponent = tryReadIntText(exponent, pos, end);
|
||||||
|
|
||||||
if (pos_after_exponent == pos)
|
if (pos_after_exponent == pos)
|
||||||
return false;
|
return false;
|
||||||
@ -92,7 +92,7 @@ bool ParserSampleRatio::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
|||||||
if (!parseDecimal(pos->begin, pos->end, numerator))
|
if (!parseDecimal(pos->begin, pos->end, numerator))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
bool has_slash = pos.isValid() && *pos == '/';
|
bool has_slash = pos->type == TokenType::Slash;
|
||||||
|
|
||||||
if (has_slash)
|
if (has_slash)
|
||||||
{
|
{
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
#include <Parsers/parseQuery.h>
|
#include <Parsers/parseQuery.h>
|
||||||
#include <Parsers/ParserQuery.h>
|
#include <Parsers/ParserQuery.h>
|
||||||
#include <Parsers/ASTInsertQuery.h>
|
#include <Parsers/ASTInsertQuery.h>
|
||||||
|
#include <Parsers/Lexer.h>
|
||||||
|
#include <Parsers/TokenIterator.h>
|
||||||
#include <Common/StringUtils.h>
|
#include <Common/StringUtils.h>
|
||||||
#include <Common/typeid_cast.h>
|
#include <Common/typeid_cast.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <IO/WriteHelpers.h>
|
||||||
@ -24,7 +26,7 @@ static std::pair<size_t, size_t> getLineAndCol(const char * begin, const char *
|
|||||||
size_t line = 0;
|
size_t line = 0;
|
||||||
|
|
||||||
const char * nl;
|
const char * nl;
|
||||||
while (nullptr != (nl = reinterpret_cast<IParser::Pos>(memchr(begin, '\n', pos - begin))))
|
while (nullptr != (nl = reinterpret_cast<const char *>(memchr(begin, '\n', pos - begin))))
|
||||||
{
|
{
|
||||||
++line;
|
++line;
|
||||||
begin = nl + 1;
|
begin = nl + 1;
|
||||||
@ -117,7 +119,11 @@ ASTPtr tryParseQuery(
|
|||||||
const std::string & description,
|
const std::string & description,
|
||||||
bool allow_multi_statements)
|
bool allow_multi_statements)
|
||||||
{
|
{
|
||||||
if (pos == end || *pos == ';')
|
Tokens tokens(pos, end);
|
||||||
|
TokenIterator token_iterator(tokens);
|
||||||
|
|
||||||
|
if (token_iterator->type == TokenType::EndOfStream
|
||||||
|
|| token_iterator->type == TokenType::Semicolon)
|
||||||
{
|
{
|
||||||
out_error_message = "Empty query";
|
out_error_message = "Empty query";
|
||||||
return nullptr;
|
return nullptr;
|
||||||
@ -125,33 +131,42 @@ ASTPtr tryParseQuery(
|
|||||||
|
|
||||||
Expected expected = "";
|
Expected expected = "";
|
||||||
const char * begin = pos;
|
const char * begin = pos;
|
||||||
const char * max_parsed_pos = pos;
|
|
||||||
|
|
||||||
ASTPtr res;
|
ASTPtr res;
|
||||||
bool parse_res = parser.parse(pos, res, expected);
|
bool parse_res = parser.parse(token_iterator, res, expected);
|
||||||
|
|
||||||
/// Parsed query must end with end of data or semicolon.
|
/// Lexical error
|
||||||
if (!parse_res || (pos.isValid() && *pos != ';'))
|
if (!parse_res && token_iterator->type > TokenType::EndOfStream)
|
||||||
|
{
|
||||||
|
expected = "any valid token";
|
||||||
|
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Excessive input after query. Parsed query must end with end of data or semicolon.
|
||||||
|
if (parse_res && token_iterator->type != TokenType::EndOfStream && token_iterator->type != TokenType::Semicolon)
|
||||||
{
|
{
|
||||||
if (!expected || !*expected)
|
|
||||||
expected = "end of query";
|
expected = "end of query";
|
||||||
out_error_message = getSyntaxErrorMessage(begin, expected, hilite, description);
|
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If multi-statements are not allowed, then after semicolon, there must be no non-space characters.
|
/// If multi-statements are not allowed, then after semicolon, there must be no non-space characters.
|
||||||
if (!allow_multi_statements && pos.isValid() && *pos == ';')
|
while (token_iterator->type == TokenType::Semicolon)
|
||||||
{
|
++token_iterator;
|
||||||
++pos;
|
|
||||||
while (pos.isValid() && isWhitespaceASCII(*pos))
|
|
||||||
++pos;
|
|
||||||
|
|
||||||
if (pos.isValid())
|
if (!allow_multi_statements && token_iterator->type != TokenType::EndOfStream)
|
||||||
{
|
{
|
||||||
out_error_message = getSyntaxErrorMessage(begin, end, pos, nullptr, hilite,
|
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, nullptr, hilite,
|
||||||
(description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed");
|
(description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed");
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse error.
|
||||||
|
if (!parse_res)
|
||||||
|
{
|
||||||
|
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
|
||||||
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
@ -189,15 +204,16 @@ ASTPtr parseQuery(
|
|||||||
std::pair<const char *, bool> splitMultipartQuery(const std::string & queries, std::vector<std::string> & queries_list)
|
std::pair<const char *, bool> splitMultipartQuery(const std::string & queries, std::vector<std::string> & queries_list)
|
||||||
{
|
{
|
||||||
ASTPtr ast;
|
ASTPtr ast;
|
||||||
ParserQuery parser;
|
|
||||||
|
|
||||||
const char * begin = queries.data(); /// begin of current query
|
const char * begin = queries.data(); /// begin of current query
|
||||||
const char * pos = begin; /// parser moves pos from begin to the end of current query
|
const char * pos = begin; /// parser moves pos from begin to the end of current query
|
||||||
const char * end = begin + queries.size();
|
const char * end = begin + queries.size();
|
||||||
|
|
||||||
|
ParserQuery parser(end);
|
||||||
|
|
||||||
queries_list.clear();
|
queries_list.clear();
|
||||||
|
|
||||||
while (pos.isValid())
|
while (pos < end)
|
||||||
{
|
{
|
||||||
begin = pos;
|
begin = pos;
|
||||||
|
|
||||||
|
@ -34,8 +34,8 @@ std::map<TokenType, const char *> hilite =
|
|||||||
{TokenType::Asterisk, "\033[1;33m"},
|
{TokenType::Asterisk, "\033[1;33m"},
|
||||||
{TokenType::Plus, "\033[1;33m"},
|
{TokenType::Plus, "\033[1;33m"},
|
||||||
{TokenType::Minus, "\033[1;33m"},
|
{TokenType::Minus, "\033[1;33m"},
|
||||||
{TokenType::Division, "\033[1;33m"},
|
{TokenType::Slash, "\033[1;33m"},
|
||||||
{TokenType::Modulo, "\033[1;33m"},
|
{TokenType::Percent, "\033[1;33m"},
|
||||||
{TokenType::Arrow, "\033[1;33m"},
|
{TokenType::Arrow, "\033[1;33m"},
|
||||||
{TokenType::QuestionMark, "\033[1;33m"},
|
{TokenType::QuestionMark, "\033[1;33m"},
|
||||||
{TokenType::Colon, "\033[1;33m"},
|
{TokenType::Colon, "\033[1;33m"},
|
||||||
|
Loading…
Reference in New Issue
Block a user