#pragma once #include #include #include namespace DB { /** Parser operates on lazy stream of tokens. * It could do lookaheads of any depth. */ /** Used as an input for parsers. * All whitespace and comment tokens are transparently skipped. */ class Tokens { private: std::vector data; Lexer lexer; public: Tokens(const char * begin, const char * end, size_t max_query_size = 0) : lexer(begin, end, max_query_size) {} const Token & operator[] (size_t index) { while (true) { if (index < data.size()) return data[index]; if (!data.empty() && data.back().isEnd()) return data.back(); Token token = lexer.nextToken(); if (token.isSignificant()) data.emplace_back(token); } } const Token & max() { if (data.empty()) return (*this)[0]; return data.back(); } }; /// To represent position in a token stream. class TokenIterator { private: Tokens * tokens; size_t index = 0; public: explicit TokenIterator(Tokens & tokens_) : tokens(&tokens_) {} ALWAYS_INLINE const Token & get() { return (*tokens)[index]; } ALWAYS_INLINE const Token & operator*() { return get(); } ALWAYS_INLINE const Token * operator->() { return &get(); } ALWAYS_INLINE TokenIterator & operator++() { ++index; return *this; } ALWAYS_INLINE TokenIterator & operator--() { --index; return *this; } ALWAYS_INLINE bool operator<(const TokenIterator & rhs) const { return index < rhs.index; } ALWAYS_INLINE bool operator<=(const TokenIterator & rhs) const { return index <= rhs.index; } ALWAYS_INLINE bool operator==(const TokenIterator & rhs) const { return index == rhs.index; } ALWAYS_INLINE bool operator!=(const TokenIterator & rhs) const { return index != rhs.index; } ALWAYS_INLINE bool isValid() { return get().type < TokenType::EndOfStream; } /// Rightmost token we had looked. ALWAYS_INLINE const Token & max() { return tokens->max(); } }; /// Returns positions of unmatched parentheses. using UnmatchedParentheses = std::vector; UnmatchedParentheses checkUnmatchedParentheses(TokenIterator begin); }