ClickHouse/src/Parsers/IParser.h

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

156 lines
4.7 KiB
C++
Raw Normal View History

#pragma once
2010-06-24 19:12:10 +00:00
#include <absl/container/inlined_vector.h>
2023-03-29 09:20:27 +00:00
#include <algorithm>
#include <memory>
2010-06-24 19:12:10 +00:00
#include <Core/Defines.h>
#include <Parsers/IAST_fwd.h>
#include <Parsers/TokenIterator.h>
#include <base/types.h>
#include <Common/Exception.h>
#include <Common/checkStackSize.h>
2010-06-24 19:12:10 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int TOO_DEEP_RECURSION;
extern const int LOGICAL_ERROR;
}
/** Collects variants, how parser could proceed further at rightmost position.
*/
struct Expected
{
absl::InlinedVector<const char *, 7> variants;
2022-10-13 20:19:25 +00:00
const char * max_parsed_pos = nullptr;
/// 'description' should be statically allocated string.
2021-11-29 10:32:06 +00:00
ALWAYS_INLINE void add(const char * current_pos, const char * description)
{
if (!max_parsed_pos || current_pos > max_parsed_pos)
{
variants.clear();
max_parsed_pos = current_pos;
2021-11-29 18:46:51 +00:00
variants.push_back(description);
return;
}
2022-10-14 13:43:06 +00:00
if ((current_pos == max_parsed_pos) && (std::find(variants.begin(), variants.end(), description) == variants.end()))
variants.push_back(description);
}
2021-11-29 10:32:06 +00:00
ALWAYS_INLINE void add(TokenIterator it, const char * description)
{
add(it->begin, description);
}
};
2010-06-24 19:12:10 +00:00
2017-05-27 17:29:55 +00:00
/** Interface for parser classes
2010-06-24 19:12:10 +00:00
*/
class IParser
{
public:
/// Token iterator augmented with depth information. This allows to control recursion depth.
struct Pos : TokenIterator
{
uint32_t depth = 0;
uint32_t max_depth = 0;
2022-01-09 02:45:54 +00:00
2022-01-09 07:42:06 +00:00
Pos(Tokens & tokens_, uint32_t max_depth_) : TokenIterator(tokens_), max_depth(max_depth_)
2021-12-25 12:23:00 +00:00
{
}
Pos(TokenIterator token_iterator_, uint32_t max_depth_) : TokenIterator(token_iterator_), max_depth(max_depth_) { }
2021-11-29 10:32:22 +00:00
ALWAYS_INLINE void increaseDepth()
{
++depth;
2021-11-29 10:32:22 +00:00
if (unlikely(max_depth > 0 && depth > max_depth))
throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Maximum parse depth ({}) exceeded. "
"Consider rising max_parser_depth parameter.", max_depth);
/** Sometimes the maximum parser depth can be set to a high value by the user,
* but we still want to avoid stack overflow.
* For this purpose, we can use the checkStackSize function, but it is too heavy.
* The solution is to check not too frequently.
* The frequency is arbitrary, but not too large, not too small,
* and a power of two to simplify the division.
*/
#if defined(USE_MUSL) || defined(SANITIZER) || !defined(NDEBUG)
static constexpr uint32_t check_frequency = 128;
#else
static constexpr uint32_t check_frequency = 8192;
#endif
if (depth % check_frequency == 0)
checkStackSize();
}
2021-11-29 10:32:22 +00:00
ALWAYS_INLINE void decreaseDepth()
{
2021-11-29 10:32:22 +00:00
if (unlikely(depth == 0))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in parser: incorrect calculation of parse depth");
--depth;
}
};
2010-06-24 19:12:10 +00:00
2017-05-27 17:29:55 +00:00
/** Get the text of this parser parses. */
virtual const char * getName() const = 0;
2010-06-24 19:12:10 +00:00
2017-05-27 17:29:55 +00:00
/** Parse piece of text from position `pos`, but not beyond end of line (`end` - position after end of line),
* move pointer `pos` to the maximum position to which it was possible to parse,
* in case of success return `true` and the result in `node` if it is needed, otherwise false,
* in `expected` write what was expected in the maximum position,
* to which it was possible to parse if parsing was unsuccessful,
* or what this parser parse if parsing was successful.
* The string to which the [begin, end) range is included may be not 0-terminated.
2010-06-24 19:12:10 +00:00
*/
virtual bool parse(Pos & pos, ASTPtr & node, Expected & expected) = 0;
2010-06-24 19:12:10 +00:00
2023-02-19 22:15:09 +00:00
bool ignore(Pos & pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
ASTPtr ignore_node;
return parse(pos, ignore_node, expected);
2010-06-24 19:12:10 +00:00
}
2015-02-01 07:27:56 +00:00
2023-02-19 22:15:09 +00:00
bool ignore(Pos & pos)
2010-06-24 19:12:10 +00:00
{
Expected expected;
return ignore(pos, expected);
2010-06-24 19:12:10 +00:00
}
2017-05-27 17:29:55 +00:00
/** The same, but do not move the position and do not write the result to node.
2010-06-24 19:12:10 +00:00
*/
bool check(Pos & pos, Expected & expected)
2010-06-24 19:12:10 +00:00
{
Pos begin = pos;
ASTPtr node;
if (!parse(pos, node, expected))
2010-06-24 19:12:10 +00:00
{
pos = begin;
return false;
}
else
return true;
}
2019-10-08 13:26:15 +00:00
/** The same, but doesn't move the position even if parsing was successful.
2018-11-14 22:46:39 +00:00
*/
2019-10-08 13:26:15 +00:00
bool checkWithoutMoving(Pos pos, Expected & expected)
2018-11-14 22:46:39 +00:00
{
ASTPtr node;
return parse(pos, node, expected);
}
virtual ~IParser() = default;
2010-06-24 19:12:10 +00:00
};
using ParserPtr = std::unique_ptr<IParser>;
2010-06-24 19:12:10 +00:00
}