mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
Add malformed output generation to JSON fuzzer (#57646)
Randomly modify structural characters of a valid JSON ('{', '}', '[', ']', ':', '"', ',') to generate output that cannot be parsed as JSON. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/56490
This commit is contained in:
parent
e989eb7bd2
commit
b8d274d070
@ -19,6 +19,7 @@ fuzzJSON({ named_collection [option=value [,..]] | json_str[, random_seed] })
|
||||
- `json_str` (String) - The source string representing structured data in JSON format.
|
||||
- `random_seed` (UInt64) - Manual random seed for producing stable results.
|
||||
- `reuse_output` (boolean) - Reuse the output from a fuzzing process as input for the next fuzzer.
|
||||
- `malform_output` (boolean) - Generate a string that cannot be parsed as a JSON object.
|
||||
- `max_output_length` (UInt64) - Maximum allowable length of the generated or perturbed JSON string.
|
||||
- `probability` (Float64) - The probability to fuzz a JSON field (a key-value pair). Must be within [0, 1] range.
|
||||
- `max_nesting_level` (UInt64) - The maximum allowed depth of nested structures within the JSON data.
|
||||
@ -84,3 +85,13 @@ SELECT * FROM fuzzJSON('{"id":1}', 1234) LIMIT 3;
|
||||
{"BRjE":16137826149911306846}
|
||||
{"XjKE":15076727133550123563}
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT * FROM fuzzJSON(json_nc, json_str='{"name" : "FuzzJSON"}', random_seed=1337, malform_output=true) LIMIT 3;
|
||||
```
|
||||
|
||||
``` text
|
||||
U"name":"FuzzJSON*"SpByjZKtr2VAyHCO"falseh
|
||||
{"name"keFuzzJSON, "g6vVO7TCIk":jTt^
|
||||
{"DBhz":YFuzzJSON5}
|
||||
```
|
||||
|
@ -248,10 +248,10 @@ Field generateRandomFixedValue(const StorageFuzzJSON::Configuration & config, pc
|
||||
return f;
|
||||
}
|
||||
|
||||
String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & source)
|
||||
String fuzzString(UInt64 min_length, UInt64 max_length, pcg64 & rnd, const String & source, std::function<char(pcg64 &)> charGen)
|
||||
{
|
||||
String result;
|
||||
result.reserve(config.max_key_length);
|
||||
result.reserve(max_length);
|
||||
|
||||
using FA = FuzzAction;
|
||||
auto get_action = [&]() -> FuzzAction
|
||||
@ -261,7 +261,7 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
|
||||
};
|
||||
|
||||
size_t i = 0;
|
||||
while (i < source.size() && result.size() < config.max_key_length)
|
||||
while (i < source.size() && result.size() < max_length)
|
||||
{
|
||||
auto action = get_action();
|
||||
switch (action)
|
||||
@ -271,12 +271,12 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
|
||||
}
|
||||
break;
|
||||
case FA::Edit: {
|
||||
result.push_back(generateRandomKeyCharacter(rnd));
|
||||
result.push_back(charGen(rnd));
|
||||
++i;
|
||||
}
|
||||
break;
|
||||
case FA::Add: {
|
||||
result.push_back(generateRandomKeyCharacter(rnd));
|
||||
result.push_back(charGen(rnd));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -284,12 +284,24 @@ String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, c
|
||||
}
|
||||
}
|
||||
|
||||
while (result.size() < config.min_key_length)
|
||||
result.push_back(generateRandomKeyCharacter(rnd));
|
||||
while (result.size() < min_length)
|
||||
result.push_back(charGen(rnd));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
String fuzzJSONKey(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & key)
|
||||
{
|
||||
return fuzzString(config.min_key_length, config.max_key_length, rnd, key, generateRandomKeyCharacter);
|
||||
}
|
||||
|
||||
// Randomly modify structural characters (e.g. '{', '}', '[', ']', ':', '"') to generate output that cannot be parsed as JSON.
|
||||
String fuzzJSONStructure(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, const String & s)
|
||||
{
|
||||
return config.should_malform_output ? fuzzString(/*min_length*/ 0, /*max_length*/ s.size(), rnd, s, generateRandomStringValueCharacter)
|
||||
: s;
|
||||
}
|
||||
|
||||
std::shared_ptr<JSONNode>
|
||||
generateRandomJSONNode(const StorageFuzzJSON::Configuration & config, pcg64 & rnd, bool with_key, JSONValue::Type type)
|
||||
{
|
||||
@ -397,7 +409,7 @@ void fuzzJSONObject(
|
||||
if (next_node->key)
|
||||
{
|
||||
writeDoubleQuoted(*next_node->key, out);
|
||||
out << ":";
|
||||
out << fuzzJSONStructure(config, rnd, ":");
|
||||
}
|
||||
|
||||
auto & val = next_node->value;
|
||||
@ -405,7 +417,11 @@ void fuzzJSONObject(
|
||||
if (val.fixed)
|
||||
{
|
||||
if (val.fixed->getType() == Field::Types::Which::String)
|
||||
writeDoubleQuoted(val.fixed->get<String>(), out);
|
||||
{
|
||||
out << fuzzJSONStructure(config, rnd, "\"");
|
||||
writeText(val.fixed->get<String>(), out);
|
||||
out << fuzzJSONStructure(config, rnd, "\"");
|
||||
}
|
||||
else
|
||||
writeFieldText(*val.fixed, out);
|
||||
}
|
||||
@ -414,9 +430,9 @@ void fuzzJSONObject(
|
||||
if (!val.array && !val.object)
|
||||
return;
|
||||
|
||||
const auto & [op, cl, node_list] = val.array ? std::make_tuple('[', ']', *val.array) : std::make_tuple('{', '}', *val.object);
|
||||
const auto & [op, cl, node_list] = val.array ? std::make_tuple("[", "]", *val.array) : std::make_tuple("{", "}", *val.object);
|
||||
|
||||
out << op;
|
||||
out << fuzzJSONStructure(config, rnd, op);
|
||||
|
||||
bool first = true;
|
||||
for (const auto & ptr : node_list)
|
||||
@ -426,7 +442,7 @@ void fuzzJSONObject(
|
||||
|
||||
WriteBufferFromOwnString child_out;
|
||||
if (!first)
|
||||
child_out << ", ";
|
||||
child_out << fuzzJSONStructure(config, rnd, ", ");
|
||||
first = false;
|
||||
|
||||
fuzzJSONObject(ptr, child_out, config, rnd, depth + 1, node_count);
|
||||
@ -435,7 +451,7 @@ void fuzzJSONObject(
|
||||
break;
|
||||
out << child_out.str();
|
||||
}
|
||||
out << cl;
|
||||
out << fuzzJSONStructure(config, rnd, cl);
|
||||
}
|
||||
}
|
||||
|
||||
@ -554,10 +570,11 @@ Pipe StorageFuzzJSON::read(
|
||||
return Pipe::unitePipes(std::move(pipes));
|
||||
}
|
||||
|
||||
static constexpr std::array<std::string_view, 13> optional_configuration_keys
|
||||
static constexpr std::array<std::string_view, 14> optional_configuration_keys
|
||||
= {"json_str",
|
||||
"random_seed",
|
||||
"reuse_output",
|
||||
"malform_output",
|
||||
"probability",
|
||||
"max_output_length",
|
||||
"max_nesting_level",
|
||||
@ -583,6 +600,9 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration
|
||||
if (collection.has("reuse_output"))
|
||||
configuration.should_reuse_output = static_cast<bool>(collection.get<UInt64>("reuse_output"));
|
||||
|
||||
if (collection.has("malform_output"))
|
||||
configuration.should_malform_output = static_cast<bool>(collection.get<UInt64>("malform_output"));
|
||||
|
||||
if (collection.has("probability"))
|
||||
{
|
||||
configuration.probability = collection.get<Float64>("probability");
|
||||
|
@ -27,6 +27,7 @@ public:
|
||||
String json_str = "{}";
|
||||
UInt64 random_seed = randomSeed();
|
||||
bool should_reuse_output = false;
|
||||
bool should_malform_output = false;
|
||||
Float64 probability = 0.25;
|
||||
|
||||
UInt64 max_output_length = 1024;
|
||||
|
@ -150,3 +150,4 @@
|
||||
{}
|
||||
730
|
||||
200
|
||||
50
|
||||
|
@ -92,15 +92,70 @@ SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=0) L
|
||||
SELECT * FROM fuzzJSON(02918_json_fuzzer, max_key_length=10, min_key_length=11) LIMIT 10; -- { serverError BAD_ARGUMENTS }
|
||||
|
||||
--
|
||||
DROP TABLE IF EXISTS 02918_table_obj;
|
||||
CREATE TABLE 02918_table_obj (json_obj Object('json')) Engine=Memory;
|
||||
DROP TABLE IF EXISTS 02918_table_obj1;
|
||||
CREATE TABLE 02918_table_obj1 (json_obj Object('json')) Engine=Memory;
|
||||
|
||||
INSERT INTO 02918_table_obj SELECT * FROM fuzzJSON(
|
||||
INSERT INTO 02918_table_obj1 SELECT * FROM fuzzJSON(
|
||||
02918_json_fuzzer,
|
||||
json_str='{"name": "John Doe", "age": 27, "address": {"city": "Citiville", "zip": "12345"}, "hobbies": ["reading", "traveling", "coding"]}',
|
||||
random_seed=12345) LIMIT 200;
|
||||
SELECT count() FROM 02918_table_obj;
|
||||
|
||||
DROP TABLE IF EXISTS 02918_table_obj;
|
||||
SELECT count() FROM 02918_table_obj1;
|
||||
|
||||
DROP TABLE IF EXISTS 02918_table_obj1;
|
||||
|
||||
--
|
||||
DROP TABLE IF EXISTS 02918_table_obj2;
|
||||
CREATE TABLE 02918_table_obj2 (json_obj Object('json')) Engine=Memory;
|
||||
|
||||
INSERT INTO 02918_table_obj2 SELECT * FROM fuzzJSON(
|
||||
02918_json_fuzzer,
|
||||
json_str=
|
||||
'{
|
||||
"name": {
|
||||
"first": "Joan",
|
||||
"last": "of Arc"
|
||||
},
|
||||
"birth": {"date": "January 6, 1412", "place": "Domremy, France"},
|
||||
"death": {"date": "May 30, 1431", "place": "Rouen, France"},
|
||||
"occupation": "Military Leader",
|
||||
"achievements": ["Lifted Siege of Orleans", "Assisted in Charles VII\'s Coronation"],
|
||||
"legacy": {
|
||||
"honors": ["Canonized Saint", "National Heroine of France"],
|
||||
"memorials": [
|
||||
{"name": "Joan of Arc Memorial", "location": "Domremy"},
|
||||
{"name": "Place Jeanne d\'Arc", "location": "Rouen"}
|
||||
]
|
||||
}
|
||||
}',
|
||||
random_seed=12345,
|
||||
max_output_length=1024) LIMIT 50;
|
||||
|
||||
INSERT INTO 02918_table_obj2 SELECT * FROM fuzzJSON(
|
||||
02918_json_fuzzer,
|
||||
json_str=
|
||||
'{
|
||||
"name": {
|
||||
"first": "Joan",
|
||||
"last": "of Arc"
|
||||
},
|
||||
"birth": {"date": "January 6, 1412", "place": "Domremy, France"},
|
||||
"death": {"date": "May 30, 1431", "place": "Rouen, France"},
|
||||
"occupation": "Military Leader",
|
||||
"achievements": ["Lifted Siege of Orleans", "Assisted in Charles VII\'s Coronation"],
|
||||
"legacy": {
|
||||
"honors": ["Canonized Saint", "National Heroine of France"],
|
||||
"memorials": [
|
||||
{"name": "Joan of Arc Memorial", "location": "Domremy"},
|
||||
{"name": "Place Jeanne d\'Arc", "location": "Rouen"}
|
||||
]
|
||||
}
|
||||
}',
|
||||
random_seed=12345,
|
||||
max_output_length=1024, malform_output=true) LIMIT 50; -- {serverError INCORRECT_DATA }
|
||||
|
||||
SELECT count() FROM 02918_table_obj2;
|
||||
|
||||
DROP TABLE IF EXISTS 02918_table_obj2;
|
||||
|
||||
DROP NAMED COLLECTION IF EXISTS 02918_json_fuzzer;
|
||||
|
Loading…
Reference in New Issue
Block a user