ClickHouse/src/Parsers/Lexer.h

125 lines
3.5 KiB
C++
Raw Normal View History

2017-07-09 00:04:05 +00:00
#pragma once
#include <stddef.h>
2017-07-09 00:04:05 +00:00
namespace DB
{
#define APPLY_FOR_TOKENS(M) \
M(Whitespace) \
M(Comment) \
\
M(BareWord) /** Either keyword (SELECT) or identifier (column) */ \
\
M(Number) /** Always non-negative. No leading plus. 123 or something like 123.456e12, 0x123p12 */ \
M(StringLiteral) /** 'hello word', 'hello''word', 'hello\'word\\' */ \
\
M(QuotedIdentifier) /** "x", `x` */ \
\
M(OpeningRoundBracket) \
M(ClosingRoundBracket) \
\
M(OpeningSquareBracket) \
M(ClosingSquareBracket) \
\
2019-05-25 13:43:52 +00:00
M(OpeningCurlyBrace) \
M(ClosingCurlyBrace) \
2019-05-18 21:07:23 +00:00
\
M(Comma) \
M(Semicolon) \
2022-01-28 15:14:54 +00:00
M(VerticalDelimiter) /** Vertical delimiter \G */ \
M(Dot) /** Compound identifiers, like a.b or tuple access operator a.1, (x, y).2. */ \
/** Need to be distinguished from floating point number with omitted integer part: .1 */ \
\
M(Asterisk) /** Could be used as multiplication operator or on it's own: "SELECT *" */ \
\
2021-07-21 08:59:05 +00:00
M(HereDoc) \
\
Just Works Just works (remastered) First steps First steps fixed First steps first fails Research first steps Tokenizer created Sprint to the moon Rename Rename 2.0 Rename 3.0 Work in progress Update Oops Oops x2 Try this Now surely works Maybe now? Now? Cmake first try Restore to previous Cmake second try Make this work Correct mistakes Third try cmake Exclude simd Better Try Add std::cerr More std::cerr More and more std::cerr Maybe fix? A B C D E F G H I J K L M N O P AA AB AC AD AE AF AAA AAB AAC AAD AAF AAE AAF AAG AAH AAI AAJ AAK AAAA AAAB AAAC AAAD AAAE AAAF AAAG AAAH AAAAA AAAAB First try v2 First try v2.1 First try v2.2 First try v2.3 First try v2.4 First try v2.5 First try v2.6 First try v2.7 First try v2.8 First try v2.9 First try v2.10 First try v2.11 First try v2.12 First try v2.13 First try v2.14 First try v2.15 First try v2.16 First try v2.16 First try v2.17 First try v2.18 First try v2.19 First try v2.20 First try v2.21 First try v2.22 First try v2.23 First try v2.24 First try v2.25 First try v2.26 First try v2.27 First try v2.28 First try v2.29 First try v2.30 First try v2.31 First try v2.32 First try v2.33 First try v2.34 First try v2.35 First try v2.36 First try v2.37 Second try v2.00 Second try v2.01 Second try v2.02 Second try v2.03 Second try v2.04 Second try v2.05 Second try v2.06 Second try v2.07 Second try v2.08 Second try v2.09 Second try v2.10 Second try v2.11 Second try v2.12 Second try v2.13 Second try v2.14 Second try v2.15 Second try v2.16 Second try v2.17 Cleanup Link SQLJSON only in simdjson build Fix? Fix?1.1 Fix Revert "Fix" This reverts commit 9df7aa977c880ec130062bceece7e215190b4837. Revert "Fix?1.1" This reverts commit 37429ecc9003fd73c106344186e39ff6603dde6c. Revert "Fix?" This reverts commit c1236fb8f4b5a799a5564aecf81136301f226e33. Revert "Link SQLJSON only in simdjson build" This reverts commit 8795cd8b143f3cfd312ddbf1b98e10d0d6fcaf51. Revert "Cleanup" This reverts commit e100dbc545f54421276be2e5d44f99f52fe1d87c. Third try v2.0 Third try v2.1 Third try v2.2 Third try v2.3 Third try v2.4 Third try v2.5 Third try v2.6 Third try v2.7 Third try v2.8 Third try v2.9 Third try v2.10 Third try v2.11 Third try v2.12 Third try v2.13 Third try v2.14 Third try v2.15 Pre-intermediate touches v1.0 Pre-intermediate touches v1.1 Pre-intermediate touches v1.2 Pre-intermediate touches v1.3 Last changes
2021-03-24 19:47:28 +00:00
M(DollarSign) \
M(Plus) \
M(Minus) \
M(Slash) \
M(Percent) \
M(Arrow) /** ->. Should be distinguished from minus operator. */ \
M(QuestionMark) \
M(Colon) \
2021-05-06 18:21:10 +00:00
M(DoubleColon) \
M(Equals) \
M(NotEquals) \
M(Less) \
M(Greater) \
M(LessOrEquals) \
M(GreaterOrEquals) \
M(Concatenation) /** String concatenation operator: || */ \
\
M(At) /** @. Used for specifying user names and also for MySQL-style variables. */ \
M(DoubleAt) /** @@. Used for MySQL-style global variables. */ \
2020-02-04 21:35:22 +00:00
\
/** Order is important. EndOfStream goes after all usual tokens, and special error tokens goes after EndOfStream. */ \
\
M(EndOfStream) \
\
/** Something unrecognized. */ \
M(Error) \
/** Something is wrong and we have more information. */ \
M(ErrorMultilineCommentIsNotClosed) \
M(ErrorSingleQuoteIsNotClosed) \
M(ErrorDoubleQuoteIsNotClosed) \
M(ErrorBackQuoteIsNotClosed) \
M(ErrorSingleExclamationMark) \
M(ErrorSinglePipeMark) \
M(ErrorWrongNumber) \
2018-03-26 19:41:55 +00:00
M(ErrorMaxQuerySizeExceeded) \
2017-07-09 00:04:05 +00:00
enum class TokenType
{
#define M(TOKEN) TOKEN,
APPLY_FOR_TOKENS(M)
#undef M
2017-07-09 00:04:05 +00:00
};
const char * getTokenName(TokenType type);
const char * getErrorTokenDescription(TokenType type);
2017-07-09 00:04:05 +00:00
struct Token
{
TokenType type;
const char * begin;
const char * end;
size_t size() const { return end - begin; }
2017-07-09 00:04:05 +00:00
Token() = default;
2019-08-03 11:02:40 +00:00
Token(TokenType type_, const char * begin_, const char * end_) : type(type_), begin(begin_), end(end_) {}
bool isSignificant() const { return type != TokenType::Whitespace && type != TokenType::Comment; }
bool isError() const { return type > TokenType::EndOfStream; }
bool isEnd() const { return type == TokenType::EndOfStream; }
2017-07-09 00:04:05 +00:00
};
class Lexer
{
public:
2019-08-03 11:02:40 +00:00
Lexer(const char * begin_, const char * end_, size_t max_query_size_ = 0)
: begin(begin_), pos(begin_), end(end_), max_query_size(max_query_size_) {}
2017-07-09 00:04:05 +00:00
Token nextToken();
private:
const char * const begin;
const char * pos;
const char * const end;
2018-03-26 19:41:55 +00:00
const size_t max_query_size;
Token nextTokenImpl();
/// This is needed to disambiguate tuple access operator from floating point number (.1).
TokenType prev_significant_token_type = TokenType::Whitespace; /// No previous token.
2017-07-09 00:04:05 +00:00
};
}