Add malformed output generation to JSON fuzzer (#57646)

Randomly modify structural characters of a valid JSON ('{', '}', '[', ']',
':', '"', ',') to generate output that cannot be parsed as JSON.

Follow-up to https://github.com/ClickHouse/ClickHouse/pull/56490
This commit is contained in:
Julia Kartseva 2023-12-13 10:59:31 -08:00 committed by GitHub
parent e989eb7bd2
commit b8d274d070
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 107 additions and 19 deletions

View File

@ -19,6 +19,7 @@ fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
- `json_str` (String) - The source string representing structured data in JSON format.
- `random_seed` (UInt64) - Manual random seed for producing stable results.
- `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
- `malform_output` (boolean) - Generate a string that cannot be parsed as a JSON object.
- `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
- `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
- `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
@ -84,3 +85,13 @@ SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
{"BRjE":16137826149911306846}
{"XjKE":15076727133550123563}
```
``` sql
SELECT * FROM fuzzJSON(json_nc, json_str='{"name" : "FuzzJSON"}', random_seed=1337, malform_output=true) LIMIT 3;
```
``` text
U"name":"FuzzJSON*"SpByjZKtr2VAyHCO"falseh
{"name"keFuzzJSON, "g6vVO7TCIk":jTt^
{"DBhz":YFuzzJSON5}
```

View File

@ -248,10 +248,10 @@ Field generateRandomFixedValue(const StorageFuzzJSON::Configuration & config, pc
return f;
}
String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & source)
String fuzzString(UInt64 min_length, UInt64 max_length, pcg64 & rnd, const String & source, std::function<char(pcg64 &)> charGen)
{
String result;
result.reserve(config.max_key_length);
result.reserve(max_length);
using FA = FuzzAction;
auto get_action = [&]() -> FuzzAction
@ -261,7 +261,7 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
};
size_t i = 0;
while (i < source.size() && result.size() < config.max_key_length)
while (i < source.size() && result.size() < max_length)
{
auto action = get_action();
switch (action)
@ -271,12 +271,12 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
}
break;
case FA::Edit: {
result.push_back(generateRandomKeyCharacter(rnd));
result.push_back(charGen(rnd));
++i;
}
break;
case FA::Add: {
result.push_back(generateRandomKeyCharacter(rnd));
result.push_back(charGen(rnd));
}
break;
default:
@ -284,12 +284,24 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
}
}
while (result.size() < config.min_key_length)
result.push_back(generateRandomKeyCharacter(rnd));
while (result.size() < min_length)
result.push_back(charGen(rnd));
return result;
}
String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & key)
{
return fuzzString(config.min_key_length, config.max_key_length, rnd, key, generateRandomKeyCharacter);
}
// Randomly modify structural characters (e.g. '{', '}', '[', ']', ':', '"') to generate output that cannot be parsed as JSON.
String fuzzJSONStructure(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & s)
{
return config.should_malform_output ? fuzzString(/*min_length*/ 0, /*max_length*/ s.size(), rnd, s, generateRandomStringValueCharacter)
: s;
}
std::shared_ptr<JSONNode>
generateRandomJSONNode(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, JSONValue::Type type)
{
@ -397,7 +409,7 @@ void fuzzJSONObject(
if (next_node->key)
{
writeDoubleQuoted(*next_node->key, out);
out << ":";
out << fuzzJSONStructure(config, rnd, ":");
}
auto & val = next_node->value;
@ -405,7 +417,11 @@ void fuzzJSONObject(
if (val.fixed)
{
if (val.fixed->getType() == Field::Types::Which::String)
writeDoubleQuoted(val.fixed->get<String>(), out);
{
out << fuzzJSONStructure(config, rnd, "\"");
writeText(val.fixed->get<String>(), out);
out << fuzzJSONStructure(config, rnd, "\"");
}
else
writeFieldText(*val.fixed, out);
}
@ -414,9 +430,9 @@ void fuzzJSONObject(
if (!val.array && !val.object)
return;
const auto & [op, cl, node_list] = val.array ? std::make_tuple('[', ']', *val.array) : std::make_tuple('{', '}', *val.object);
const auto & [op, cl, node_list] = val.array ? std::make_tuple("[", "]", *val.array) : std::make_tuple("{", "}", *val.object);
out << op;
out << fuzzJSONStructure(config, rnd, op);
bool first = true;
for (const auto & ptr : node_list)
@ -426,7 +442,7 @@ void fuzzJSONObject(
WriteBufferFromOwnString child_out;
if (!first)
child_out << ", ";
child_out << fuzzJSONStructure(config, rnd, ", ");
first = false;
fuzzJSONObject(ptr, child_out, config, rnd, depth + 1, node_count);
@ -435,7 +451,7 @@ void fuzzJSONObject(
break;
out << child_out.str();
}
out << cl;
out << fuzzJSONStructure(config, rnd, cl);
}
}
@ -554,10 +570,11 @@ Pipe StorageFuzzJSON::read(
return Pipe::unitePipes(std::move(pipes));
}
static constexpr std::array<std::string_view, 13> optional_configuration_keys
static constexpr std::array<std::string_view, 14> optional_configuration_keys
= {"json_str",
"random_seed",
"reuse_output",
"malform_output",
"probability",
"max_output_length",
"max_nesting_level",
@ -583,6 +600,9 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration
if (collection.has("reuse_output"))
configuration.should_reuse_output = static_cast<bool>(collection.get<UInt64>("reuse_output"));
if (collection.has("malform_output"))
configuration.should_malform_output = static_cast<bool>(collection.get<UInt64>("malform_output"));
if (collection.has("probability"))
{
configuration.probability = collection.get<Float64>("probability");

View File

@ -27,6 +27,7 @@ public:
String json_str = "{}";
UInt64 random_seed = randomSeed();
bool should_reuse_output = false;
bool should_malform_output = false;
Float64 probability = 0.25;
UInt64 max_output_length = 1024;

View File

@ -150,3 +150,4 @@
{}
730
200
50

View File

@ -92,15 +92,70 @@ SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=0) L
SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=11) LIMIT 10; -- { serverError BAD_ARGUMENTS }
--
DROP TABLE IF EXISTS 02918_table_obj;
CREATE TABLE 02918_table_obj (json_obj Object('json')) Engine=Memory;
DROP TABLE IF EXISTS 02918_table_obj1;
CREATE TABLE 02918_table_obj1 (json_obj Object('json')) Engine=Memory;
INSERT INTO 02918_table_obj SELECT * FROM fuzzJSON(
INSERT INTO 02918_table_obj1 SELECT * FROM fuzzJSON(
02918_json_fuzzer,
json_str='{"name": "John Doe", "age": 27, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
random_seed=12345) LIMIT 200;
SELECT count() FROM 02918_table_obj;
DROP TABLE IF EXISTS 02918_table_obj;
SELECT count() FROM 02918_table_obj1;
DROP TABLE IF EXISTS 02918_table_obj1;
--
DROP TABLE IF EXISTS 02918_table_obj2;
CREATE TABLE 02918_table_obj2 (json_obj Object('json')) Engine=Memory;
INSERT INTO 02918_table_obj2 SELECT * FROM fuzzJSON(
02918_json_fuzzer,
json_str=
'{
"name": {
"first": "Joan",
"last": "of Arc"
},
"birth": {"date": "January 6, 1412", "place": "Domremy, France"},
"death": {"date": "May 30, 1431", "place": "Rouen, France"},
"occupation": "Military Leader",
"achievements": ["Lifted Siege of Orleans", "Assisted in Charles VII\'s Coronation"],
"legacy": {
"honors": ["Canonized Saint", "National Heroine of France"],
"memorials": [
{"name": "Joan of Arc Memorial", "location": "Domremy"},
{"name": "Place Jeanne d\'Arc", "location": "Rouen"}
]
}
}',
random_seed=12345,
max_output_length=1024) LIMIT 50;
INSERT INTO 02918_table_obj2 SELECT * FROM fuzzJSON(
02918_json_fuzzer,
json_str=
'{
"name": {
"first": "Joan",
"last": "of Arc"
},
"birth": {"date": "January 6, 1412", "place": "Domremy, France"},
"death": {"date": "May 30, 1431", "place": "Rouen, France"},
"occupation": "Military Leader",
"achievements": ["Lifted Siege of Orleans", "Assisted in Charles VII\'s Coronation"],
"legacy": {
"honors": ["Canonized Saint", "National Heroine of France"],
"memorials": [
{"name": "Joan of Arc Memorial", "location": "Domremy"},
{"name": "Place Jeanne d\'Arc", "location": "Rouen"}
]
}
}',
random_seed=12345,
max_output_length=1024, malform_output=true) LIMIT 50; -- {serverError INCORRECT_DATA }
SELECT count() FROM 02918_table_obj2;
DROP TABLE IF EXISTS 02918_table_obj2;
DROP NAMED COLLECTION IF EXISTS 02918_json_fuzzer;