mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-23 18:20:50 +00:00
Using lexer (incomplete) [#CLICKHOUSE-2].
This commit is contained in:
parent
4b1eeee1a3
commit
d815b766fa
@ -221,10 +221,10 @@ Token Lexer::nextToken()
|
||||
return Token(TokenType::ErrorMultilineCommentIsNotClosed, token_begin, end);
|
||||
}
|
||||
}
|
||||
return Token(TokenType::Division, token_begin, pos);
|
||||
return Token(TokenType::Slash, token_begin, pos);
|
||||
}
|
||||
case '%':
|
||||
return Token(TokenType::Modulo, token_begin, ++pos);
|
||||
return Token(TokenType::Percent, token_begin, ++pos);
|
||||
case '=': /// =, ==
|
||||
{
|
||||
++pos;
|
||||
|
@ -31,8 +31,8 @@ enum class TokenType
|
||||
|
||||
Plus,
|
||||
Minus,
|
||||
Division,
|
||||
Modulo,
|
||||
Slash,
|
||||
Percent,
|
||||
Arrow, /// ->. Should be distinguished from minus operator.
|
||||
QuestionMark,
|
||||
Colon,
|
||||
|
@ -71,17 +71,13 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
/// VALUES or FORMAT or SELECT
|
||||
if (s_values.ignore(pos, expected))
|
||||
{
|
||||
data = pos;
|
||||
data = pos->begin;
|
||||
}
|
||||
else if (s_format.ignore(pos, expected))
|
||||
{
|
||||
if (!name_p.parse(pos, format, expected))
|
||||
return false;
|
||||
|
||||
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
|
||||
const char * data_begin = pos->end;
|
||||
|
||||
ws_without_nl.ignore(pos);
|
||||
if (pos->type == TokenType::Semicolon)
|
||||
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
|
||||
"Example:\n\n"
|
||||
@ -91,10 +87,17 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
"\n"
|
||||
"Note that there is no ';' in first line.", ErrorCodes::SYNTAX_ERROR);
|
||||
|
||||
if (pos.isValid() && *pos == '\n')
|
||||
++pos;
|
||||
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
|
||||
data = pos->begin;
|
||||
|
||||
data = pos;
|
||||
while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
|
||||
++data;
|
||||
|
||||
if (data < end && *data == '\r')
|
||||
++data;
|
||||
|
||||
if (data < end && *data == '\n')
|
||||
++data;
|
||||
}
|
||||
else if (s_select.ignore(pos, expected))
|
||||
{
|
||||
@ -108,7 +111,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
return false;
|
||||
}
|
||||
|
||||
auto query = std::make_shared<ASTInsertQuery>(StringRange(begin, data ? data : pos));
|
||||
auto query = std::make_shared<ASTInsertQuery>(StringRange(begin, pos));
|
||||
node = query;
|
||||
|
||||
if (database)
|
||||
@ -121,7 +124,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
|
||||
query->columns = columns;
|
||||
query->select = select;
|
||||
query->data = data != end ? data : NULL;
|
||||
query->data = data != end ? data : nullptr;
|
||||
query->end = end;
|
||||
|
||||
if (columns)
|
||||
|
@ -26,10 +26,11 @@ class ParserInsertQuery : public IParserBase
|
||||
{
|
||||
private:
|
||||
const char * end;
|
||||
protected:
|
||||
|
||||
const char * getName() const override { return "INSERT query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
public:
|
||||
ParserInsertQuery(const char * end) : end(end) {}
|
||||
const char * getName() const { return "INSERT query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,10 +10,11 @@ class ParserQuery : public IParserBase
|
||||
{
|
||||
private:
|
||||
const char * end;
|
||||
protected:
|
||||
ParserInsertQuery(const char * end) : end(end) {}
|
||||
const char * getName() const { return "Query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected);
|
||||
|
||||
const char * getName() const override { return "Query"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
public:
|
||||
ParserQuery(const char * end) : end(end) {}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
|
||||
UInt64 num_after = 0;
|
||||
Int64 exponent = 0;
|
||||
|
||||
IParser::Pos pos_after_first_num = tryReadIntText(num_before, pos, end);
|
||||
const char * pos_after_first_num = tryReadIntText(num_before, pos, end);
|
||||
|
||||
bool has_num_before_point = pos_after_first_num > pos;
|
||||
pos = pos_after_first_num;
|
||||
@ -32,7 +32,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
|
||||
|
||||
if (has_point)
|
||||
{
|
||||
IParser::Pos pos_after_second_num = tryReadIntText(num_after, pos, end);
|
||||
const char * pos_after_second_num = tryReadIntText(num_after, pos, end);
|
||||
number_of_digits_after_point = pos_after_second_num - pos;
|
||||
pos = pos_after_second_num;
|
||||
}
|
||||
@ -42,7 +42,7 @@ static bool parseDecimal(const char * pos, const char * end, ASTSampleRatio::Rat
|
||||
if (has_exponent)
|
||||
{
|
||||
++pos;
|
||||
IParser::Pos pos_after_exponent = tryReadIntText(exponent, pos, end);
|
||||
const char * pos_after_exponent = tryReadIntText(exponent, pos, end);
|
||||
|
||||
if (pos_after_exponent == pos)
|
||||
return false;
|
||||
@ -92,7 +92,7 @@ bool ParserSampleRatio::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
if (!parseDecimal(pos->begin, pos->end, numerator))
|
||||
return false;
|
||||
|
||||
bool has_slash = pos.isValid() && *pos == '/';
|
||||
bool has_slash = pos->type == TokenType::Slash;
|
||||
|
||||
if (has_slash)
|
||||
{
|
||||
|
@ -1,6 +1,8 @@
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/ParserQuery.h>
|
||||
#include <Parsers/ASTInsertQuery.h>
|
||||
#include <Parsers/Lexer.h>
|
||||
#include <Parsers/TokenIterator.h>
|
||||
#include <Common/StringUtils.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -24,7 +26,7 @@ static std::pair<size_t, size_t> getLineAndCol(const char * begin, const char *
|
||||
size_t line = 0;
|
||||
|
||||
const char * nl;
|
||||
while (nullptr != (nl = reinterpret_cast<IParser::Pos>(memchr(begin, '\n', pos - begin))))
|
||||
while (nullptr != (nl = reinterpret_cast<const char *>(memchr(begin, '\n', pos - begin))))
|
||||
{
|
||||
++line;
|
||||
begin = nl + 1;
|
||||
@ -117,7 +119,11 @@ ASTPtr tryParseQuery(
|
||||
const std::string & description,
|
||||
bool allow_multi_statements)
|
||||
{
|
||||
if (pos == end || *pos == ';')
|
||||
Tokens tokens(pos, end);
|
||||
TokenIterator token_iterator(tokens);
|
||||
|
||||
if (token_iterator->type == TokenType::EndOfStream
|
||||
|| token_iterator->type == TokenType::Semicolon)
|
||||
{
|
||||
out_error_message = "Empty query";
|
||||
return nullptr;
|
||||
@ -125,33 +131,42 @@ ASTPtr tryParseQuery(
|
||||
|
||||
Expected expected = "";
|
||||
const char * begin = pos;
|
||||
const char * max_parsed_pos = pos;
|
||||
|
||||
ASTPtr res;
|
||||
bool parse_res = parser.parse(pos, res, expected);
|
||||
bool parse_res = parser.parse(token_iterator, res, expected);
|
||||
|
||||
/// Parsed query must end with end of data or semicolon.
|
||||
if (!parse_res || (pos.isValid() && *pos != ';'))
|
||||
/// Lexical error
|
||||
if (!parse_res && token_iterator->type > TokenType::EndOfStream)
|
||||
{
|
||||
expected = "any valid token";
|
||||
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Excessive input after query. Parsed query must end with end of data or semicolon.
|
||||
if (parse_res && token_iterator->type != TokenType::EndOfStream && token_iterator->type != TokenType::Semicolon)
|
||||
{
|
||||
if (!expected || !*expected)
|
||||
expected = "end of query";
|
||||
out_error_message = getSyntaxErrorMessage(begin, expected, hilite, description);
|
||||
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// If multi-statements are not allowed, then after semicolon, there must be no non-space characters.
|
||||
if (!allow_multi_statements && pos.isValid() && *pos == ';')
|
||||
{
|
||||
++pos;
|
||||
while (pos.isValid() && isWhitespaceASCII(*pos))
|
||||
++pos;
|
||||
while (token_iterator->type == TokenType::Semicolon)
|
||||
++token_iterator;
|
||||
|
||||
if (pos.isValid())
|
||||
if (!allow_multi_statements && token_iterator->type != TokenType::EndOfStream)
|
||||
{
|
||||
out_error_message = getSyntaxErrorMessage(begin, end, pos, nullptr, hilite,
|
||||
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, nullptr, hilite,
|
||||
(description.empty() ? std::string() : std::string(". ")) + "Multi-statements are not allowed");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Parse error.
|
||||
if (!parse_res)
|
||||
{
|
||||
out_error_message = getSyntaxErrorMessage(begin, end, token_iterator->begin, expected, hilite, description);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return res;
|
||||
@ -189,15 +204,16 @@ ASTPtr parseQuery(
|
||||
std::pair<const char *, bool> splitMultipartQuery(const std::string & queries, std::vector<std::string> & queries_list)
|
||||
{
|
||||
ASTPtr ast;
|
||||
ParserQuery parser;
|
||||
|
||||
const char * begin = queries.data(); /// begin of current query
|
||||
const char * pos = begin; /// parser moves pos from begin to the end of current query
|
||||
const char * end = begin + queries.size();
|
||||
|
||||
ParserQuery parser(end);
|
||||
|
||||
queries_list.clear();
|
||||
|
||||
while (pos.isValid())
|
||||
while (pos < end)
|
||||
{
|
||||
begin = pos;
|
||||
|
||||
|
@ -34,8 +34,8 @@ std::map<TokenType, const char *> hilite =
|
||||
{TokenType::Asterisk, "\033[1;33m"},
|
||||
{TokenType::Plus, "\033[1;33m"},
|
||||
{TokenType::Minus, "\033[1;33m"},
|
||||
{TokenType::Division, "\033[1;33m"},
|
||||
{TokenType::Modulo, "\033[1;33m"},
|
||||
{TokenType::Slash, "\033[1;33m"},
|
||||
{TokenType::Percent, "\033[1;33m"},
|
||||
{TokenType::Arrow, "\033[1;33m"},
|
||||
{TokenType::QuestionMark, "\033[1;33m"},
|
||||
{TokenType::Colon, "\033[1;33m"},
|
||||
|
Loading…
Reference in New Issue
Block a user