Added --input_format_import_nested_json option

By default mapping of nested json data to nested tables is disabled. To
enable the import of nested json data (into corresponding nested tables)
clickhouse must be run with the --input_format_import_nested_json=1
option.
This commit is contained in:
Veloman Yunkan 2018-09-14 13:43:57 +00:00
parent 47eb0e28b3
commit 5fe127e4c3
6 changed files with 23 additions and 5 deletions

View File

@ -41,6 +41,7 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields;
format_settings.import_nested_json = settings.input_format_import_nested_json;
format_settings.date_time_input_format = settings.date_time_input_format;
format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;

View File

@ -49,6 +49,7 @@ struct FormatSettings
bool skip_unknown_fields = false;
bool write_statistics = true;
bool import_nested_json = false;
enum class DateTimeInputFormat
{

View File

@ -37,6 +37,8 @@ JSONEachRowRowInputStream::JSONEachRowRowInputStream(ReadBuffer & istr_, const B
{
const String& colname = columnName(i);
name_map[colname] = i; /// NOTE You could place names more cache-locally.
if ( format_settings.import_nested_json )
{
const auto splitted = Nested::splitName(colname);
if ( ! splitted.second.empty() )
{
@ -45,6 +47,7 @@ JSONEachRowRowInputStream::JSONEachRowRowInputStream(ReadBuffer & istr_, const B
}
}
}
}
const String& JSONEachRowRowInputStream::columnName(size_t i) const
{

View File

@ -148,6 +148,7 @@ struct Settings
M(SettingBool, add_http_cors_header, false, "Write add http CORS header.") \
\
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow and TSKV formats).") \
M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \
\
M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \
\

View File

@ -3,6 +3,11 @@
0 [] [45,67,8]
1 ok ['dog','cat','pig'] [3,3,3]
1 ok ['zero','negative one'] [0,-1]
1 ok [] []
0 [] []
0 [] []
1 ok [] [3,3,3]
1 ok [] []
1 ok ['abc','def'] [1,23]
0 [] []
0 [] [45,67,8]

View File

@ -16,6 +16,9 @@ echo '{"d1" : 1, "d2" : "ok", "n.s" : ["abc", "def"], "n.i" : [1, 23]}
| $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 -q "INSERT INTO test.json_each_row_nested FORMAT JSONEachRow"
$CLICKHOUSE_CLIENT --max_threads=1 -q "SELECT * FROM test.json_each_row_nested"
test_nested_json()
{
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.json_each_row_nested"
$CLICKHOUSE_CLIENT -q "CREATE TABLE test.json_each_row_nested (d1 UInt8, d2 String, n Nested (s String, i Int32) ) ENGINE = Memory"
@ -25,7 +28,11 @@ echo '{"d1" : 1, "d2" : "ok", "n" : { "s" : ["abc", "def"], "i" : [1, 23]} }
{"t1" : 0, "n.t2":true,"n" : {"i":[45, 67, 8]}, "t4":null,"t5":[],"t6":"trash" }
{"d2":"ok","n" : {"s":["dog", "cat", "pig"], "x":[["1","2"]]}, "d1":"1", "n.i":[3, 3, 3]}
{"t0" : -0.1, "n": {"s" : ["zero","negative one"], "i" : [0, -1]}, "d2" : "ok", "d1" : 1}' \
| $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 -q "INSERT INTO test.json_each_row_nested FORMAT JSONEachRow"
| $CLICKHOUSE_CLIENT "$@" --input_format_skip_unknown_fields=1 -q "INSERT INTO test.json_each_row_nested FORMAT JSONEachRow"
$CLICKHOUSE_CLIENT --max_threads=1 -q "SELECT * FROM test.json_each_row_nested"
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.json_each_row_nested"
}
test_nested_json
test_nested_json --input_format_import_nested_json=1