#include "QueryFuzzer.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int TOO_DEEP_RECURSION; } Field QueryFuzzer::getRandomField(int type) { static constexpr Int64 bad_int64_values[] = {-2, -1, 0, 1, 2, 3, 7, 10, 100, 255, 256, 257, 1023, 1024, 1025, 65535, 65536, 65537, 1024 * 1024 - 1, 1024 * 1024, 1024 * 1024 + 1, INT_MIN - 1ll, INT_MIN, INT_MIN + 1, INT_MAX - 1, INT_MAX, INT_MAX + 1ll, INT64_MIN, INT64_MIN + 1, INT64_MAX - 1, INT64_MAX}; switch (type) { case 0: { return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))]; } case 1: { static constexpr double values[] = {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999, 1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20, FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))]; } case 2: { static constexpr UInt64 scales[] = {0, 1, 2, 10}; return DecimalField( bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))], static_cast(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))]) ); } default: assert(false); return Null{}; } } Field QueryFuzzer::fuzzField(Field field) { const auto type = field.getType(); int type_index = -1; if (type == Field::Types::Int64 || type == Field::Types::UInt64) { type_index = 0; } else if (type == Field::Types::Float64) { type_index = 1; } else if (type == Field::Types::Decimal32 || type == Field::Types::Decimal64 || type == Field::Types::Decimal128 || type == Field::Types::Decimal256) { type_index = 2; } if (fuzz_rand() % 20 == 0) { return Null{}; } if (type_index >= 0) { if (fuzz_rand() % 20 == 0) { // Change type sometimes, but not often, because it mostly leads to // boring errors. type_index = fuzz_rand() % 3; } return getRandomField(type_index); } if (type == Field::Types::String) { auto & str = field.get(); UInt64 action = fuzz_rand() % 10; switch (action) { case 0: str = ""; break; case 1: str = str + str; break; case 2: str = str + str + str + str; break; case 4: if (!str.empty()) { str[fuzz_rand() % str.size()] = '\0'; } break; default: // Do nothing break; } } else if (type == Field::Types::Array) { auto & arr = field.get(); if (fuzz_rand() % 5 == 0 && !arr.empty()) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) { if (!arr.empty()) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); std::cerr << "inserted (0)\n"; } } for (auto & element : arr) { element = fuzzField(element); } } else if (type == Field::Types::Tuple) { auto & arr = field.get(); if (fuzz_rand() % 5 == 0 && !arr.empty()) { size_t pos = fuzz_rand() % arr.size(); arr.erase(arr.begin() + pos); std::cerr << "erased\n"; } if (fuzz_rand() % 5 == 0) { if (!arr.empty()) { size_t pos = fuzz_rand() % arr.size(); arr.insert(arr.begin() + pos, fuzzField(arr[pos])); std::cerr << fmt::format("inserted (pos {})\n", pos); } else { arr.insert(arr.begin(), getRandomField(0)); std::cerr << "inserted (0)\n"; } } for (auto & element : arr) { element = fuzzField(element); } } return field; } ASTPtr QueryFuzzer::getRandomColumnLike() { if (column_like.empty()) { return nullptr; } ASTPtr new_ast = column_like[fuzz_rand() % column_like.size()]->clone(); new_ast->setAlias(""); return new_ast; } ASTPtr QueryFuzzer::getRandomExpressionList() { if (column_like.empty()) { return nullptr; } ASTPtr new_ast = std::make_shared(); for (size_t i = 0; i < fuzz_rand() % 5 + 1; ++i) { new_ast->children.push_back(getRandomColumnLike()); } return new_ast; } void QueryFuzzer::replaceWithColumnLike(ASTPtr & ast) { if (column_like.empty()) { return; } std::string old_alias = ast->tryGetAlias(); ast = getRandomColumnLike(); ast->setAlias(old_alias); } void QueryFuzzer::replaceWithTableLike(ASTPtr & ast) { if (table_like.empty()) { return; } ASTPtr new_ast = table_like[fuzz_rand() % table_like.size()]->clone(); std::string old_alias = ast->tryGetAlias(); new_ast->setAlias(old_alias); ast = new_ast; } void QueryFuzzer::fuzzOrderByElement(ASTOrderByElement * elem) { switch (fuzz_rand() % 10) { case 0: elem->direction = -1; break; case 1: elem->direction = 1; break; case 2: elem->nulls_direction = -1; elem->nulls_direction_was_explicitly_specified = true; break; case 3: elem->nulls_direction = 1; elem->nulls_direction_was_explicitly_specified = true; break; case 4: elem->nulls_direction = elem->direction; elem->nulls_direction_was_explicitly_specified = false; break; default: // do nothing break; } } void QueryFuzzer::fuzzOrderByList(IAST * ast) { if (!ast) { return; } auto * list = assert_cast(ast); // Remove element if (fuzz_rand() % 50 == 0 && list->children.size() > 1) { // Don't remove last element -- this leads to questionable // constructs such as empty select. list->children.erase(list->children.begin() + fuzz_rand() % list->children.size()); } // Add element if (fuzz_rand() % 50 == 0) { auto * pos = list->children.empty() ? list->children.begin() : list->children.begin() + fuzz_rand() % list->children.size(); auto col = getRandomColumnLike(); if (col) { auto elem = std::make_shared(); elem->children.push_back(col); elem->direction = 1; elem->nulls_direction = 1; elem->nulls_direction_was_explicitly_specified = false; elem->with_fill = false; list->children.insert(pos, elem); } else { std::cerr << "No random column.\n"; } } // We don't have to recurse here to fuzz the children, this is handled by // the generic recursion into IAST.children. } void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) { if (!ast) { return; } auto * impl = assert_cast(ast); // Remove element if (fuzz_rand() % 50 == 0 && impl->children.size() > 1) { // Don't remove last element -- this leads to questionable // constructs such as empty select. impl->children.erase(impl->children.begin() + fuzz_rand() % impl->children.size()); } // Add element if (fuzz_rand() % 50 == 0) { auto * pos = impl->children.empty() ? impl->children.begin() : impl->children.begin() + fuzz_rand() % impl->children.size(); auto col = getRandomColumnLike(); if (col) impl->children.insert(pos, col); else std::cerr << "No random column.\n"; } // We don't have to recurse here to fuzz the children, this is handled by // the generic recursion into IAST.children. } void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def) { switch (fuzz_rand() % 40) { case 0: { const auto r = fuzz_rand() % 3; def.frame_type = r == 0 ? WindowFrame::FrameType::ROWS : r == 1 ? WindowFrame::FrameType::RANGE : WindowFrame::FrameType::GROUPS; break; } case 1: { const auto r = fuzz_rand() % 3; def.frame_begin_type = r == 0 ? WindowFrame::BoundaryType::Unbounded : r == 1 ? WindowFrame::BoundaryType::Current : WindowFrame::BoundaryType::Offset; if (def.frame_begin_type == WindowFrame::BoundaryType::Offset) { // The offsets are fuzzed normally through 'children'. def.frame_begin_offset = std::make_shared(getRandomField(0)); } else { def.frame_begin_offset = nullptr; } break; } case 2: { const auto r = fuzz_rand() % 3; def.frame_end_type = r == 0 ? WindowFrame::BoundaryType::Unbounded : r == 1 ? WindowFrame::BoundaryType::Current : WindowFrame::BoundaryType::Offset; if (def.frame_end_type == WindowFrame::BoundaryType::Offset) { def.frame_end_offset = std::make_shared(getRandomField(0)); } else { def.frame_end_offset = nullptr; } break; } case 5: { def.frame_begin_preceding = fuzz_rand() % 2; break; } case 6: { def.frame_end_preceding = fuzz_rand() % 2; break; } default: break; } if (def.frame_type == WindowFrame::FrameType::RANGE && def.frame_begin_type == WindowFrame::BoundaryType::Unbounded && def.frame_begin_preceding && def.frame_end_type == WindowFrame::BoundaryType::Current) { def.frame_is_default = true; /* NOLINT clang-tidy could you just shut up please */ } else { def.frame_is_default = false; } } bool QueryFuzzer::isSuitableForFuzzing(const ASTCreateQuery & create) { return create.columns_list && create.columns_list->columns; } static String getOriginalTableName(const String & full_name) { return full_name.substr(0, full_name.find("__fuzz_")); } static String getFuzzedTableName(const String & original_name, size_t index) { return original_name + "__fuzz_" + toString(index); } void QueryFuzzer::fuzzCreateQuery(ASTCreateQuery & create) { if (create.columns_list && create.columns_list->columns) { for (auto & ast : create.columns_list->columns->children) { if (auto * column = ast->as()) { fuzzColumnDeclaration(*column); } } } if (create.storage && create.storage->engine) { /// Replace ReplicatedMergeTree to ordinary MergeTree /// to avoid inconsistency of metadata in zookeeper. auto & engine_name = create.storage->engine->name; if (startsWith(engine_name, "Replicated")) { engine_name = engine_name.substr(strlen("Replicated")); if (auto & arguments = create.storage->engine->arguments) { auto & children = arguments->children; if (children.size() <= 2) arguments.reset(); else children.erase(children.begin(), children.begin() + 2); } } } auto full_name = create.getTable(); auto original_name = getOriginalTableName(full_name); size_t index = index_of_fuzzed_table[original_name]++; auto new_name = getFuzzedTableName(original_name, index); create.setTable(new_name); SipHash sip_hash; sip_hash.update(original_name); if (create.columns_list) create.columns_list->updateTreeHash(sip_hash, /*ignore_aliases=*/ true); if (create.storage) create.storage->updateTreeHash(sip_hash, /*ignore_aliases=*/ true); const auto hash = getSipHash128AsPair(sip_hash); /// Save only tables with unique definition. if (created_tables_hashes.insert(hash).second) original_table_name_to_fuzzed[original_name].insert(new_name); } void QueryFuzzer::fuzzColumnDeclaration(ASTColumnDeclaration & column) { if (column.type) { auto data_type = fuzzDataType(DataTypeFactory::instance().get(column.type)); ParserDataType parser; column.type = parseQuery(parser, data_type->getName(), DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); } } DataTypePtr QueryFuzzer::fuzzDataType(DataTypePtr type) { /// Do not replace Array/Tuple/etc. with not Array/Tuple too often. const auto * type_array = typeid_cast(type.get()); if (type_array && fuzz_rand() % 4 != 0) return std::make_shared(fuzzDataType(type_array->getNestedType())); const auto * type_tuple = typeid_cast(type.get()); if (type_tuple && fuzz_rand() % 4 != 0) { DataTypes elements; for (const auto & element : type_tuple->getElements()) elements.push_back(fuzzDataType(element)); return type_tuple->haveExplicitNames() ? std::make_shared(elements, type_tuple->getElementNames()) : std::make_shared(elements); } const auto * type_map = typeid_cast(type.get()); if (type_map && fuzz_rand() % 4 != 0) { auto key_type = fuzzDataType(type_map->getKeyType()); auto value_type = fuzzDataType(type_map->getValueType()); if (!DataTypeMap::checkKeyType(key_type)) key_type = type_map->getKeyType(); return std::make_shared(key_type, value_type); } const auto * type_nullable = typeid_cast(type.get()); if (type_nullable) { size_t tmp = fuzz_rand() % 3; if (tmp == 0) return fuzzDataType(type_nullable->getNestedType()); if (tmp == 1) { auto nested_type = fuzzDataType(type_nullable->getNestedType()); if (nested_type->canBeInsideNullable()) return std::make_shared(nested_type); } } const auto * type_low_cardinality = typeid_cast(type.get()); if (type_low_cardinality) { size_t tmp = fuzz_rand() % 3; if (tmp == 0) return fuzzDataType(type_low_cardinality->getDictionaryType()); if (tmp == 1) { auto nested_type = fuzzDataType(type_low_cardinality->getDictionaryType()); if (nested_type->canBeInsideLowCardinality()) return std::make_shared(nested_type); } } size_t tmp = fuzz_rand() % 8; if (tmp == 0) return std::make_shared(type); if (tmp <= 1 && type->canBeInsideNullable()) return std::make_shared(type); if (tmp <= 2 && type->canBeInsideLowCardinality()) return std::make_shared(type); if (tmp <= 3) return getRandomType(); return type; } DataTypePtr QueryFuzzer::getRandomType() { auto type_id = static_cast(fuzz_rand() % static_cast(TypeIndex::Tuple) + 1); if (type_id == TypeIndex::Tuple) { size_t tuple_size = fuzz_rand() % 6 + 1; DataTypes elements; for (size_t i = 0; i < tuple_size; ++i) elements.push_back(getRandomType()); return std::make_shared(elements); } if (type_id == TypeIndex::Array) return std::make_shared(getRandomType()); /// NOLINTBEGIN(bugprone-macro-parentheses) #define DISPATCH(DECIMAL) \ if (type_id == TypeIndex::DECIMAL) \ return std::make_shared>( \ DataTypeDecimal::maxPrecision(), \ (fuzz_rand() % DataTypeDecimal::maxPrecision()) + 1); DISPATCH(Decimal32) DISPATCH(Decimal64) DISPATCH(Decimal128) DISPATCH(Decimal256) #undef DISPATCH /// NOLINTEND(bugprone-macro-parentheses) if (type_id == TypeIndex::FixedString) return std::make_shared(fuzz_rand() % 20); if (type_id == TypeIndex::Enum8) return std::make_shared(); if (type_id == TypeIndex::Enum16) return std::make_shared(); return DataTypeFactory::instance().get(String(magic_enum::enum_name(type_id))); } void QueryFuzzer::fuzzTableName(ASTTableExpression & table) { if (!table.database_and_table_name || fuzz_rand() % 3 == 0) return; const auto * identifier = table.database_and_table_name->as(); if (!identifier) return; auto table_id = identifier->getTableId(); if (table_id.empty()) return; auto original_name = getOriginalTableName(table_id.getTableName()); auto it = original_table_name_to_fuzzed.find(original_name); if (it != original_table_name_to_fuzzed.end() && !it->second.empty()) { auto new_table_name = it->second.begin(); std::advance(new_table_name, fuzz_rand() % it->second.size()); StorageID new_table_id(table_id.database_name, *new_table_name); table.database_and_table_name = std::make_shared(new_table_id); } } void QueryFuzzer::fuzzExplainQuery(ASTExplainQuery & explain) { explain.setExplainKind(fuzzExplainKind(explain.getKind())); bool settings_have_fuzzed = false; for (auto & child : explain.children) { if (auto * settings_ast = typeid_cast(child.get())) { fuzzExplainSettings(*settings_ast, explain.getKind()); settings_have_fuzzed = true; } /// Fuzzing other child like Explain Query else { fuzz(child); } } if (!settings_have_fuzzed) { auto settings_ast = std::make_shared(); settings_ast->is_standalone = false; fuzzExplainSettings(*settings_ast, explain.getKind()); explain.setSettings(settings_ast); } } ASTExplainQuery::ExplainKind QueryFuzzer::fuzzExplainKind(ASTExplainQuery::ExplainKind kind) { if (fuzz_rand() % 20 == 0) { return kind; } else if (fuzz_rand() % 11 == 0) { return ASTExplainQuery::ExplainKind::ParsedAST; } else if (fuzz_rand() % 11 == 0) { return ASTExplainQuery::ExplainKind::AnalyzedSyntax; } else if (fuzz_rand() % 11 == 0) { return ASTExplainQuery::ExplainKind::QueryTree; } else if (fuzz_rand() % 11 == 0) { return ASTExplainQuery::ExplainKind::QueryPlan; } else if (fuzz_rand() % 11 == 0) { return ASTExplainQuery::ExplainKind::QueryPipeline; } else if (fuzz_rand() % 11 == 0) { return ASTExplainQuery::ExplainKind::QueryEstimates; } else if (fuzz_rand() % 11 == 0) { return ASTExplainQuery::ExplainKind::TableOverride; } else if (fuzz_rand() % 11 == 0) { return ASTExplainQuery::ExplainKind::CurrentTransaction; } return kind; } void QueryFuzzer::fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind) { auto & changes = settings_ast.changes; static const std::unordered_map> settings_by_kind = {{ASTExplainQuery::ExplainKind::ParsedAST, {"graph", "optimize"}}, {ASTExplainQuery::ExplainKind::AnalyzedSyntax, {}}, {ASTExplainQuery::QueryTree, {"run_passes", "dump_passes", "dump_ast", "passes"}}, {ASTExplainQuery::ExplainKind::QueryPlan, {"header, description", "actions", "indexes", "optimize", "json", "sorting"}}, {ASTExplainQuery::ExplainKind::QueryPipeline, {"header", "graph=1", "compact"}}, {ASTExplainQuery::ExplainKind::QueryEstimates, {}}, {ASTExplainQuery::ExplainKind::TableOverride, {}}, {ASTExplainQuery::ExplainKind::CurrentTransaction, {}}}; const auto & settings = settings_by_kind.at(kind); if (fuzz_rand() % 50 == 0 && !changes.empty()) { changes.erase(changes.begin() + fuzz_rand() % changes.size()); } for (const auto & setting : settings) { if (fuzz_rand() % 5 == 0) { changes.emplace_back(setting, true); } } } static ASTPtr tryParseInsertQuery(const String & full_query) { const char * pos = full_query.data(); const char * end = full_query.data() + full_query.size(); ParserInsertQuery parser(end, false); String message; return tryParseQuery(parser, pos, end, message, false, "", false, DBMS_DEFAULT_MAX_QUERY_SIZE, DBMS_DEFAULT_MAX_PARSER_DEPTH); } ASTs QueryFuzzer::getInsertQueriesForFuzzedTables(const String & full_query) { auto parsed_query = tryParseInsertQuery(full_query); if (!parsed_query) return {}; const auto & insert = *parsed_query->as(); if (!insert.table) return {}; auto table_name = insert.getTable(); auto it = original_table_name_to_fuzzed.find(table_name); if (it == original_table_name_to_fuzzed.end()) return {}; ASTs queries; for (const auto & fuzzed_name : it->second) { /// Parse query from scratch for each table instead of clone, /// to store proper pointers to inlined data, /// which are not copied during clone. auto & query = queries.emplace_back(tryParseInsertQuery(full_query)); query->as()->setTable(fuzzed_name); } return queries; } ASTs QueryFuzzer::getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query) { if (drop_query.kind != ASTDropQuery::Drop) return {}; auto table_name = drop_query.getTable(); auto it = index_of_fuzzed_table.find(table_name); if (it == index_of_fuzzed_table.end()) return {}; ASTs queries; /// Drop all created tables, not only unique ones. for (size_t i = 0; i < it->second; ++i) { auto fuzzed_name = getFuzzedTableName(table_name, i); auto & query = queries.emplace_back(drop_query.clone()); query->as()->setTable(fuzzed_name); /// Just in case add IF EXISTS to avoid exceptions. query->as()->if_exists = true; } index_of_fuzzed_table.erase(it); original_table_name_to_fuzzed.erase(table_name); return queries; } void QueryFuzzer::notifyQueryFailed(ASTPtr ast) { if (ast == nullptr) return; auto remove_fuzzed_table = [this](const auto & table_name) { auto pos = table_name.find("__fuzz_"); if (pos != std::string::npos) { auto original_name = table_name.substr(0, pos); auto it = original_table_name_to_fuzzed.find(original_name); if (it != original_table_name_to_fuzzed.end()) it->second.erase(table_name); } }; if (const auto * create = ast->as()) remove_fuzzed_table(create->getTable()); if (const auto * insert = ast->as()) remove_fuzzed_table(insert->getTable()); } void QueryFuzzer::fuzz(ASTs & asts) { for (auto & ast : asts) { fuzz(ast); } } struct ScopedIncrement { size_t & counter; explicit ScopedIncrement(size_t & counter_) : counter(counter_) { ++counter; } ~ScopedIncrement() { --counter; } }; void QueryFuzzer::fuzz(ASTPtr & ast) { if (!ast) return; // Check for exceeding max depth. ScopedIncrement depth_increment(current_ast_depth); if (current_ast_depth > 500) { // The AST is too deep (see the comment for current_ast_depth). Throw // an exception to fail fast and not use this query as an etalon, or we'll // end up in a very slow and useless loop. It also makes sense to set it // lower than the default max parse depth on the server (1000), so that // we don't get the useless error about parse depth from the server either. throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "AST depth exceeded while fuzzing ({})", current_ast_depth); } // Check for loops. auto [_, inserted] = debug_visited_nodes.insert(ast.get()); if (!inserted) { fmt::print(stderr, "The AST node '{}' was already visited before." " Depth {}, {} visited nodes, current top AST:\n{}\n", static_cast(ast.get()), current_ast_depth, debug_visited_nodes.size(), (*debug_top_ast)->dumpTree()); assert(false); } // The fuzzing. if (auto * with_union = typeid_cast(ast.get())) { fuzz(with_union->list_of_selects); /// Fuzzing SELECT query to EXPLAIN query randomly. /// And we only fuzzing the root query into an EXPLAIN query, not fuzzing subquery if (fuzz_rand() % 20 == 0 && current_ast_depth <= 1) { auto explain = std::make_shared(fuzzExplainKind()); auto settings_ast = std::make_shared(); settings_ast->is_standalone = false; fuzzExplainSettings(*settings_ast, explain->getKind()); explain->setSettings(settings_ast); explain->setExplainedQuery(ast); ast = explain; } } else if (auto * with_intersect_except = typeid_cast(ast.get())) { auto selects = with_intersect_except->getListOfSelects(); fuzz(selects); } else if (auto * tables = typeid_cast(ast.get())) { fuzz(tables->children); } else if (auto * tables_element = typeid_cast(ast.get())) { fuzz(tables_element->table_join); fuzz(tables_element->table_expression); fuzz(tables_element->array_join); } else if (auto * table_expr = typeid_cast(ast.get())) { fuzzTableName(*table_expr); fuzz(table_expr->children); } else if (auto * expr_list = typeid_cast(ast.get())) { fuzz(expr_list->children); } else if (auto * order_by_element = typeid_cast(ast.get())) { fuzzOrderByElement(order_by_element); } else if (auto * fn = typeid_cast(ast.get())) { fuzzColumnLikeExpressionList(fn->arguments.get()); fuzzColumnLikeExpressionList(fn->parameters.get()); if (fn->is_window_function && fn->window_definition) { auto & def = fn->window_definition->as(); fuzzColumnLikeExpressionList(def.partition_by.get()); fuzzOrderByList(def.order_by.get()); fuzzWindowFrame(def); } fuzz(fn->children); } else if (auto * select = typeid_cast(ast.get())) { fuzzColumnLikeExpressionList(select->select().get()); if (select->groupBy().get()) { if (fuzz_rand() % 50 == 0) { select->groupBy()->children.clear(); select->setExpression(ASTSelectQuery::Expression::GROUP_BY, {}); select->group_by_with_grouping_sets = false; select->group_by_with_rollup = false; select->group_by_with_cube = false; select->group_by_with_totals = true; } else if (fuzz_rand() % 100 == 0) { select->group_by_with_grouping_sets = !select->group_by_with_grouping_sets; } else if (fuzz_rand() % 100 == 0) { select->group_by_with_rollup = !select->group_by_with_rollup; } else if (fuzz_rand() % 100 == 0) { select->group_by_with_cube = !select->group_by_with_cube; } else if (fuzz_rand() % 100 == 0) { select->group_by_with_totals = !select->group_by_with_totals; } } else if (fuzz_rand() % 50 == 0) { select->setExpression(ASTSelectQuery::Expression::GROUP_BY, getRandomExpressionList()); } if (select->where().get()) { if (fuzz_rand() % 50 == 0) { select->where()->children.clear(); select->setExpression(ASTSelectQuery::Expression::WHERE, {}); } else if (!select->prewhere().get()) { if (fuzz_rand() % 50 == 0) { select->setExpression(ASTSelectQuery::Expression::PREWHERE, select->where()->clone()); if (fuzz_rand() % 2 == 0) { select->where()->children.clear(); select->setExpression(ASTSelectQuery::Expression::WHERE, {}); } } } } else if (fuzz_rand() % 50 == 0) { select->setExpression(ASTSelectQuery::Expression::WHERE, getRandomColumnLike()); } if (select->prewhere().get()) { if (fuzz_rand() % 50 == 0) { select->prewhere()->children.clear(); select->setExpression(ASTSelectQuery::Expression::PREWHERE, {}); } else if (!select->where().get()) { if (fuzz_rand() % 50 == 0) { select->setExpression(ASTSelectQuery::Expression::WHERE, select->prewhere()->clone()); if (fuzz_rand() % 2 == 0) { select->prewhere()->children.clear(); select->setExpression(ASTSelectQuery::Expression::PREWHERE, {}); } } } } else if (fuzz_rand() % 50 == 0) { select->setExpression(ASTSelectQuery::Expression::PREWHERE, getRandomColumnLike()); } fuzzOrderByList(select->orderBy().get()); fuzz(select->children); } /* * The time to fuzz the settings has not yet come. * Apparently we don't have any infractructure to validate the values of * the settings, and the first query with max_block_size = -1 breaks * because of overflows here and there. *//* * else if (auto * set = typeid_cast(ast.get())) * { * for (auto & c : set->changes) * { * if (fuzz_rand() % 50 == 0) * { * c.value = fuzzField(c.value); * } * } * } */ else if (auto * literal = typeid_cast(ast.get())) { // There is a caveat with fuzzing the children: many ASTs also keep the // links to particular children in own fields. This means that replacing // the child with another object might lead to error. Many of these fields // are ASTPtr -- this is redundant ownership, but hides the error if the // child field is replaced. Others can be ASTLiteral * or the like, which // leads to segfault if the pointed-to AST is replaced. // Replacing children is safe in case of ASTExpressionList. In a more // general case, we can change the value of ASTLiteral, which is what we // do here. if (fuzz_rand() % 11 == 0) { literal->value = fuzzField(literal->value); } } else if (auto * create_query = typeid_cast(ast.get())) { fuzzCreateQuery(*create_query); } else if (auto * explain_query = typeid_cast(ast.get())) { /// Fuzzing EXPLAIN query to SELECT query randomly if (fuzz_rand() % 20 == 0 && explain_query->getExplainedQuery()->getQueryKind() == IAST::QueryKind::Select) { auto select_query = explain_query->getExplainedQuery()->clone(); fuzz(select_query); ast = select_query; } else { fuzzExplainQuery(*explain_query); } } else { fuzz(ast->children); } } /* * This functions collects various parts of query that we can then substitute * to a query being fuzzed. * * TODO: we just stop remembering new parts after our corpus reaches certain size. * This is boring, should implement a random replacement of existing parst with * small probability. Do this after we add this fuzzer to CI and fix all the * problems it can routinely find even in this boring version. */ void QueryFuzzer::collectFuzzInfoMain(ASTPtr ast) { collectFuzzInfoRecurse(ast); aliases.clear(); for (const auto & alias : aliases_set) { aliases.push_back(alias); } column_like.clear(); for (const auto & [name, value] : column_like_map) { column_like.push_back(value); } table_like.clear(); for (const auto & [name, value] : table_like_map) { table_like.push_back(value); } } void QueryFuzzer::addTableLike(ASTPtr ast) { if (table_like_map.size() > 1000) { table_like_map.clear(); } const auto name = ast->formatForErrorMessage(); if (name.size() < 200) { table_like_map.insert({name, ast}); } } void QueryFuzzer::addColumnLike(ASTPtr ast) { if (column_like_map.size() > 1000) { column_like_map.clear(); } const auto name = ast->formatForErrorMessage(); if (name == "Null") { // The `Null` identifier from FORMAT Null clause. We don't quote it // properly when formatting the AST, and while the resulting query // technically works, it has non-standard case for Null (the standard // is NULL), so it breaks the query formatting idempotence check. // Just plug this particular case for now. return; } if (name.size() < 200) { column_like_map.insert({name, ast}); } } void QueryFuzzer::collectFuzzInfoRecurse(ASTPtr ast) { if (auto * impl = dynamic_cast(ast.get())) { if (aliases_set.size() > 1000) { aliases_set.clear(); } aliases_set.insert(impl->alias); } if (typeid_cast(ast.get())) { addColumnLike(ast); } else if (typeid_cast(ast.get())) { addColumnLike(ast); } else if (typeid_cast(ast.get())) { addColumnLike(ast); } else if (typeid_cast(ast.get())) { addTableLike(ast); } else if (typeid_cast(ast.get())) { addTableLike(ast); } for (const auto & child : ast->children) { collectFuzzInfoRecurse(child); } } void QueryFuzzer::fuzzMain(ASTPtr & ast) { current_ast_depth = 0; debug_visited_nodes.clear(); debug_top_ast = * collectFuzzInfoMain(ast); fuzz(ast); std::cout << std::endl; WriteBufferFromOStream ast_buf(std::cout, 4096); formatAST(*ast, ast_buf, false /*highlight*/); ast_buf.finalize(); std::cout << std::endl << std::endl; } }