mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Using lexer (development) [#CLICKHOUSE-2].
This commit is contained in:
parent
f6ff1f7e72
commit
d4974b0724
@ -277,10 +277,10 @@ private:
|
||||
ErrorCodes::SYNTAX_ERROR};
|
||||
};
|
||||
|
||||
auto match = [&](const char * str)
|
||||
auto match = [&pos](const char * str) mutable
|
||||
{
|
||||
size_t length = strlen(str);
|
||||
if (pos + length < end && 0 == memcmp(pos, str, length))
|
||||
if (pos + length <= end && 0 == memcmp(pos, str, length))
|
||||
{
|
||||
pos += length;
|
||||
return true;
|
||||
|
@ -431,7 +431,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
char * pos_double = buf;
|
||||
errno = 0; /// Functions strto* don't clear errno.
|
||||
Float64 float_value = std::strtod(buf, &pos_double);
|
||||
if (pos_double == buf || errno == ERANGE)
|
||||
if (pos_double != buf + pos->size() || errno == ERANGE)
|
||||
{
|
||||
expected = "number";
|
||||
return false;
|
||||
@ -475,7 +475,7 @@ bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
|
||||
|
||||
UInt64 x = 0;
|
||||
ReadBufferFromMemory in(pos->begin, pos->size());
|
||||
if (!tryReadIntText(x, in) || in.count() == 0)
|
||||
if (!tryReadIntText(x, in) || in.count() != pos->size())
|
||||
{
|
||||
expected = "unsigned integer";
|
||||
return false;
|
||||
@ -506,6 +506,12 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
|
||||
return false;
|
||||
}
|
||||
|
||||
if (in.count() != pos->size())
|
||||
{
|
||||
expected = "string literal";
|
||||
return false;
|
||||
}
|
||||
|
||||
++pos;
|
||||
node = std::make_shared<ASTLiteral>(StringRange(pos->begin, pos->end), s);
|
||||
return true;
|
||||
|
@ -98,40 +98,52 @@ Token Lexer::nextTokenImpl()
|
||||
{
|
||||
/// The task is not to parse a number or check correctness, but only to skip it.
|
||||
|
||||
/// 0x, 0b
|
||||
bool hex = false;
|
||||
if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B'))
|
||||
{
|
||||
if (pos[1] == 'x' || pos[1] == 'X')
|
||||
hex = true;
|
||||
pos += 2;
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
|
||||
while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
|
||||
++pos;
|
||||
|
||||
/// decimal point
|
||||
if (pos < end && *pos == '.')
|
||||
/// Disambiguation: if previous token was dot, then we could parse only simple integer,
|
||||
/// for chained tuple access operators (x.1.1) to work.
|
||||
// Otherwise it will be tokenized as x . 1.1, not as x . 1 . 1
|
||||
if (prev_significant_token_type == TokenType::Dot)
|
||||
{
|
||||
++pos;
|
||||
while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
|
||||
++pos;
|
||||
}
|
||||
|
||||
/// exponentation (base 10 or base 2)
|
||||
if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E')))
|
||||
{
|
||||
++pos;
|
||||
|
||||
/// sign of exponent. It is always decimal.
|
||||
if (pos + 1 < end && (*pos == '-' || *pos == '+'))
|
||||
++pos;
|
||||
|
||||
while (pos < end && isNumericASCII(*pos))
|
||||
++pos;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// 0x, 0b
|
||||
bool hex = false;
|
||||
if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B'))
|
||||
{
|
||||
if (pos[1] == 'x' || pos[1] == 'X')
|
||||
hex = true;
|
||||
pos += 2;
|
||||
}
|
||||
else
|
||||
++pos;
|
||||
|
||||
while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
|
||||
++pos;
|
||||
|
||||
/// decimal point
|
||||
if (pos < end && *pos == '.')
|
||||
{
|
||||
++pos;
|
||||
while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos)))
|
||||
++pos;
|
||||
}
|
||||
|
||||
/// exponentation (base 10 or base 2)
|
||||
if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E')))
|
||||
{
|
||||
++pos;
|
||||
|
||||
/// sign of exponent. It is always decimal.
|
||||
if (pos + 1 < end && (*pos == '-' || *pos == '+'))
|
||||
++pos;
|
||||
|
||||
while (pos < end && isNumericASCII(*pos))
|
||||
++pos;
|
||||
}
|
||||
}
|
||||
|
||||
/// word character cannot go just after number (SELECT 123FROM)
|
||||
if (pos < end && isWordCharASCII(*pos))
|
||||
@ -168,13 +180,14 @@ Token Lexer::nextTokenImpl()
|
||||
|
||||
case '.': /// qualifier, tuple access operator or start of floating point number
|
||||
{
|
||||
/// Just after identifier or complex expression.
|
||||
/// Just after identifier or complex expression or number (for chained tuple access like x.1.1 to work properly).
|
||||
if (pos > begin
|
||||
&& (!(pos + 1 < end && isNumericASCII(pos[1]))
|
||||
|| prev_significant_token_type == TokenType::ClosingRoundBracket
|
||||
|| prev_significant_token_type == TokenType::ClosingSquareBracket
|
||||
|| prev_significant_token_type == TokenType::BareWord
|
||||
|| prev_significant_token_type == TokenType::QuotedIdentifier))
|
||||
|| prev_significant_token_type == TokenType::QuotedIdentifier
|
||||
|| prev_significant_token_type == TokenType::Number))
|
||||
return Token(TokenType::Dot, token_begin, ++pos);
|
||||
|
||||
++pos;
|
||||
|
Loading…
Reference in New Issue
Block a user