ClickHouse/programs/client/QueryFuzzer.h

81 lines
2.7 KiB
C++
Raw Normal View History

2020-06-26 05:44:42 +00:00
#pragma once
#include <unordered_set>
#include <unordered_map>
#include <vector>
2021-01-27 00:54:57 +00:00
#include <pcg-random/pcg_random.hpp>
2020-07-07 16:31:58 +00:00
#include <Common/randomSeed.h>
2020-06-26 05:44:42 +00:00
#include <Core/Field.h>
#include <Parsers/IAST.h>
2021-01-27 00:54:57 +00:00
2020-06-26 05:44:42 +00:00
namespace DB
{
2020-12-18 00:21:23 +00:00
class ASTExpressionList;
class ASTOrderByElement;
2021-05-29 13:12:18 +00:00
struct ASTWindowDefinition;
2020-12-18 00:21:23 +00:00
2020-07-07 16:31:58 +00:00
/*
* This is an AST-based query fuzzer that makes random modifications to query
* AST, changing numbers, list of columns, functions, etc. It remembers part of
* queries it fuzzed previously, and can substitute these parts to new fuzzed
* queries, so you want to feed it a lot of queries to get some interesting mix
* of them. Normally we feed SQL regression tests to it.
*/
2020-06-26 05:44:42 +00:00
struct QueryFuzzer
{
2020-07-07 16:31:58 +00:00
pcg64 fuzz_rand{randomSeed()};
2020-06-26 05:44:42 +00:00
2020-12-22 04:02:39 +00:00
// We add elements to expression lists with fixed probability. Some elements
// are so large, that the expected number of elements we add to them is
// one or higher, hence this process might never finish. Put some limit on the
// total depth of AST to prevent this.
// This field is reset for each fuzzMain() call.
size_t current_ast_depth = 0;
2020-07-07 16:31:58 +00:00
// These arrays hold parts of queries that we can substitute into the query
// we are currently fuzzing. We add some part from each new query we are asked
// to fuzz, and keep this state between queries, so the fuzzing output becomes
// more interesting over time, as the queries mix.
2020-06-26 05:44:42 +00:00
std::unordered_set<std::string> aliases_set;
std::vector<std::string> aliases;
2020-07-08 10:17:06 +00:00
std::unordered_map<std::string, ASTPtr> column_like_map;
std::vector<ASTPtr> column_like;
2020-06-26 05:44:42 +00:00
2020-07-08 10:17:06 +00:00
std::unordered_map<std::string, ASTPtr> table_like_map;
std::vector<ASTPtr> table_like;
2020-06-26 05:44:42 +00:00
2020-12-22 04:02:39 +00:00
// Some debug fields for detecting problematic ASTs with loops.
// These are reset for each fuzzMain call.
std::unordered_set<const IAST *> debug_visited_nodes;
2021-05-02 22:42:01 +00:00
ASTPtr * debug_top_ast = nullptr;
2020-12-22 04:02:39 +00:00
2020-07-07 16:31:58 +00:00
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
void fuzzMain(ASTPtr & ast);
2020-08-08 01:21:04 +00:00
// Various helper functions follow, normally you shouldn't have to call them.
2020-06-26 05:44:42 +00:00
Field getRandomField(int type);
Field fuzzField(Field field);
ASTPtr getRandomColumnLike();
void replaceWithColumnLike(ASTPtr & ast);
void replaceWithTableLike(ASTPtr & ast);
2020-12-18 00:21:23 +00:00
void fuzzOrderByElement(ASTOrderByElement * elem);
void fuzzOrderByList(IAST * ast);
void fuzzColumnLikeExpressionList(IAST * ast);
2021-05-29 13:12:18 +00:00
void fuzzWindowFrame(ASTWindowDefinition & def);
2020-06-26 05:44:42 +00:00
void fuzz(ASTs & asts);
void fuzz(ASTPtr & ast);
void collectFuzzInfoMain(const ASTPtr ast);
void addTableLike(const ASTPtr ast);
void addColumnLike(const ASTPtr ast);
void collectFuzzInfoRecurse(const ASTPtr ast);
};
}