diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 76f6a44cdca..e45ded6f8f7 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -277,10 +277,10 @@ private: ErrorCodes::SYNTAX_ERROR}; }; - auto match = [&](const char * str) + auto match = [&pos](const char * str) mutable { size_t length = strlen(str); - if (pos + length < end && 0 == memcmp(pos, str, length)) + if (pos + length <= end && 0 == memcmp(pos, str, length)) { pos += length; return true; diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index e947399ff98..89e64fabc25 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -431,7 +431,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) char * pos_double = buf; errno = 0; /// Functions strto* don't clear errno. Float64 float_value = std::strtod(buf, &pos_double); - if (pos_double == buf || errno == ERANGE) + if (pos_double != buf + pos->size() || errno == ERANGE) { expected = "number"; return false; @@ -475,7 +475,7 @@ bool ParserUnsignedInteger::parseImpl(Pos & pos, ASTPtr & node, Expected & expec UInt64 x = 0; ReadBufferFromMemory in(pos->begin, pos->size()); - if (!tryReadIntText(x, in) || in.count() == 0) + if (!tryReadIntText(x, in) || in.count() != pos->size()) { expected = "unsigned integer"; return false; @@ -506,6 +506,12 @@ bool ParserStringLiteral::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte return false; } + if (in.count() != pos->size()) + { + expected = "string literal"; + return false; + } + ++pos; node = std::make_shared(StringRange(pos->begin, pos->end), s); return true; diff --git a/dbms/src/Parsers/Lexer.cpp b/dbms/src/Parsers/Lexer.cpp index 378820e9ee4..58f65042ff0 100644 --- a/dbms/src/Parsers/Lexer.cpp +++ b/dbms/src/Parsers/Lexer.cpp @@ -98,40 +98,52 @@ Token Lexer::nextTokenImpl() { /// The task is not to parse a number or check correctness, but only to skip it. - /// 0x, 0b - bool hex = false; - if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B')) - { - if (pos[1] == 'x' || pos[1] == 'X') - hex = true; - pos += 2; - } - else - ++pos; - - while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) - ++pos; - - /// decimal point - if (pos < end && *pos == '.') + /// Disambiguation: if previous token was dot, then we could parse only simple integer, + /// for chained tuple access operators (x.1.1) to work. + // Otherwise it will be tokenized as x . 1.1, not as x . 1 . 1 + if (prev_significant_token_type == TokenType::Dot) { ++pos; - while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) - ++pos; - } - - /// exponentation (base 10 or base 2) - if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E'))) - { - ++pos; - - /// sign of exponent. It is always decimal. - if (pos + 1 < end && (*pos == '-' || *pos == '+')) - ++pos; - while (pos < end && isNumericASCII(*pos)) ++pos; } + else + { + /// 0x, 0b + bool hex = false; + if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B')) + { + if (pos[1] == 'x' || pos[1] == 'X') + hex = true; + pos += 2; + } + else + ++pos; + + while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) + ++pos; + + /// decimal point + if (pos < end && *pos == '.') + { + ++pos; + while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) + ++pos; + } + + /// exponentation (base 10 or base 2) + if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E'))) + { + ++pos; + + /// sign of exponent. It is always decimal. + if (pos + 1 < end && (*pos == '-' || *pos == '+')) + ++pos; + + while (pos < end && isNumericASCII(*pos)) + ++pos; + } + } /// word character cannot go just after number (SELECT 123FROM) if (pos < end && isWordCharASCII(*pos)) @@ -168,13 +180,14 @@ Token Lexer::nextTokenImpl() case '.': /// qualifier, tuple access operator or start of floating point number { - /// Just after identifier or complex expression. + /// Just after identifier or complex expression or number (for chained tuple access like x.1.1 to work properly). if (pos > begin && (!(pos + 1 < end && isNumericASCII(pos[1])) || prev_significant_token_type == TokenType::ClosingRoundBracket || prev_significant_token_type == TokenType::ClosingSquareBracket || prev_significant_token_type == TokenType::BareWord - || prev_significant_token_type == TokenType::QuotedIdentifier)) + || prev_significant_token_type == TokenType::QuotedIdentifier + || prev_significant_token_type == TokenType::Number)) return Token(TokenType::Dot, token_begin, ++pos); ++pos;