Added --input_format_import_nested_json option

By default mapping of nested json data to nested tables is disabled. To
enable the import of nested json data (into corresponding nested tables)
clickhouse must be run with the --input_format_import_nested_json=1
option.
This commit is contained in:
Veloman Yunkan 2018-09-14 13:43:57 +00:00
parent 47eb0e28b3
commit 5fe127e4c3
6 changed files with 23 additions and 5 deletions

View File

@ -41,6 +41,7 @@ BlockInputStreamPtr FormatFactory::getInput(const String & name, ReadBuffer & bu
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes; format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions; format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields; format_settings.skip_unknown_fields = settings.input_format_skip_unknown_fields;
format_settings.import_nested_json = settings.input_format_import_nested_json;
format_settings.date_time_input_format = settings.date_time_input_format; format_settings.date_time_input_format = settings.date_time_input_format;
format_settings.input_allow_errors_num = settings.input_format_allow_errors_num; format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;

View File

@ -49,6 +49,7 @@ struct FormatSettings
bool skip_unknown_fields = false; bool skip_unknown_fields = false;
bool write_statistics = true; bool write_statistics = true;
bool import_nested_json = false;
enum class DateTimeInputFormat enum class DateTimeInputFormat
{ {

View File

@ -37,6 +37,8 @@ JSONEachRowRowInputStream::JSONEachRowRowInputStream(ReadBuffer & istr_, const B
{ {
const String& colname = columnName(i); const String& colname = columnName(i);
name_map[colname] = i; /// NOTE You could place names more cache-locally. name_map[colname] = i; /// NOTE You could place names more cache-locally.
if ( format_settings.import_nested_json )
{
const auto splitted = Nested::splitName(colname); const auto splitted = Nested::splitName(colname);
if ( ! splitted.second.empty() ) if ( ! splitted.second.empty() )
{ {
@ -45,6 +47,7 @@ JSONEachRowRowInputStream::JSONEachRowRowInputStream(ReadBuffer & istr_, const B
} }
} }
} }
}
const String& JSONEachRowRowInputStream::columnName(size_t i) const const String& JSONEachRowRowInputStream::columnName(size_t i) const
{ {

View File

@ -148,6 +148,7 @@ struct Settings
M(SettingBool, add_http_cors_header, false, "Write add http CORS header.") \ M(SettingBool, add_http_cors_header, false, "Write add http CORS header.") \
\ \
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow and TSKV formats).") \ M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow and TSKV formats).") \
M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \
\ \
M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \ M(SettingBool, input_format_values_interpret_expressions, true, "For Values format: if field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.") \
\ \

View File

@ -3,6 +3,11 @@
0 [] [45,67,8] 0 [] [45,67,8]
1 ok ['dog','cat','pig'] [3,3,3] 1 ok ['dog','cat','pig'] [3,3,3]
1 ok ['zero','negative one'] [0,-1] 1 ok ['zero','negative one'] [0,-1]
1 ok [] []
0 [] []
0 [] []
1 ok [] [3,3,3]
1 ok [] []
1 ok ['abc','def'] [1,23] 1 ok ['abc','def'] [1,23]
0 [] [] 0 [] []
0 [] [45,67,8] 0 [] [45,67,8]

View File

@ -16,6 +16,9 @@ echo '{"d1" : 1, "d2" : "ok", "n.s" : ["abc", "def"], "n.i" : [1, 23]}
| $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 -q "INSERT INTO test.json_each_row_nested FORMAT JSONEachRow" | $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 -q "INSERT INTO test.json_each_row_nested FORMAT JSONEachRow"
$CLICKHOUSE_CLIENT --max_threads=1 -q "SELECT * FROM test.json_each_row_nested" $CLICKHOUSE_CLIENT --max_threads=1 -q "SELECT * FROM test.json_each_row_nested"
test_nested_json()
{
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.json_each_row_nested" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.json_each_row_nested"
$CLICKHOUSE_CLIENT -q "CREATE TABLE test.json_each_row_nested (d1 UInt8, d2 String, n Nested (s String, i Int32) ) ENGINE = Memory" $CLICKHOUSE_CLIENT -q "CREATE TABLE test.json_each_row_nested (d1 UInt8, d2 String, n Nested (s String, i Int32) ) ENGINE = Memory"
@ -25,7 +28,11 @@ echo '{"d1" : 1, "d2" : "ok", "n" : { "s" : ["abc", "def"], "i" : [1, 23]} }
{"t1" : 0, "n.t2":true,"n" : {"i":[45, 67, 8]}, "t4":null,"t5":[],"t6":"trash" } {"t1" : 0, "n.t2":true,"n" : {"i":[45, 67, 8]}, "t4":null,"t5":[],"t6":"trash" }
{"d2":"ok","n" : {"s":["dog", "cat", "pig"], "x":[["1","2"]]}, "d1":"1", "n.i":[3, 3, 3]} {"d2":"ok","n" : {"s":["dog", "cat", "pig"], "x":[["1","2"]]}, "d1":"1", "n.i":[3, 3, 3]}
{"t0" : -0.1, "n": {"s" : ["zero","negative one"], "i" : [0, -1]}, "d2" : "ok", "d1" : 1}' \ {"t0" : -0.1, "n": {"s" : ["zero","negative one"], "i" : [0, -1]}, "d2" : "ok", "d1" : 1}' \
| $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 -q "INSERT INTO test.json_each_row_nested FORMAT JSONEachRow" | $CLICKHOUSE_CLIENT "$@" --input_format_skip_unknown_fields=1 -q "INSERT INTO test.json_each_row_nested FORMAT JSONEachRow"
$CLICKHOUSE_CLIENT --max_threads=1 -q "SELECT * FROM test.json_each_row_nested" $CLICKHOUSE_CLIENT --max_threads=1 -q "SELECT * FROM test.json_each_row_nested"
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.json_each_row_nested" $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test.json_each_row_nested"
}
test_nested_json
test_nested_json --input_format_import_nested_json=1