mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-19 21:03:51 +00:00
Improve json read by ignore key case
This commit is contained in:
parent
2ff562ebe2
commit
7c9dbdbd9c
@ -1057,6 +1057,7 @@ class IColumn;
|
||||
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
|
||||
M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
|
||||
M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
|
||||
M(Bool, input_format_json_ignore_key_case, false, "Ignore json key case while read json field from string", 0) \
|
||||
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
|
||||
|
@ -145,6 +145,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
|
||||
format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects;
|
||||
format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence;
|
||||
format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields;
|
||||
format_settings.json.ignore_key_case = settings.input_format_json_ignore_key_case;
|
||||
format_settings.null_as_default = settings.input_format_null_as_default;
|
||||
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
|
||||
format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size;
|
||||
|
@ -224,6 +224,7 @@ struct FormatSettings
|
||||
bool infer_incomplete_types_as_strings = true;
|
||||
bool throw_on_bad_escape_sequence = true;
|
||||
bool ignore_unnecessary_fields = true;
|
||||
bool ignore_key_case = false;
|
||||
} json{};
|
||||
|
||||
struct
|
||||
|
@ -45,6 +45,14 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat(
|
||||
{
|
||||
const auto & header = getPort().getHeader();
|
||||
name_map = header.getNamesToIndexesMap();
|
||||
if (format_settings_.json.ignore_key_case)
|
||||
{
|
||||
for (auto it = name_map.begin(); it != name_map.end(); ++it)
|
||||
{
|
||||
StringRef key = it->first;
|
||||
transformFieldNameToLowerCase(key);
|
||||
}
|
||||
}
|
||||
if (format_settings_.import_nested_json)
|
||||
{
|
||||
for (size_t i = 0; i != header.columns(); ++i)
|
||||
@ -169,6 +177,10 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns)
|
||||
JSONUtils::skipColon(*in);
|
||||
skipUnknownField(name_ref);
|
||||
continue;
|
||||
}
|
||||
if (format_settings.json.ignore_key_case)
|
||||
{
|
||||
transformFieldNameToLowerCase(name_ref);
|
||||
}
|
||||
const size_t column_index = columnIndex(name_ref, key_index);
|
||||
|
||||
|
@ -55,7 +55,13 @@ private:
|
||||
|
||||
virtual void readRowStart(MutableColumns &) {}
|
||||
virtual void skipRowStart() {}
|
||||
|
||||
void transformFieldNameToLowerCase(StringRef & field_name)
|
||||
{
|
||||
if (!field_name.data) return;
|
||||
char * name_data = const_cast<char*>(field_name.data);
|
||||
for (size_t i = 0; i < field_name.size; ++i)
|
||||
name_data[i] = std::tolower(name_data[i]);
|
||||
}
|
||||
/// Buffer for the read from the stream field name. Used when you have to copy it.
|
||||
/// Also, if processing of Nested data is in progress, it holds the common prefix
|
||||
/// of the nested column names (so that appending the field name to it produces
|
||||
|
@ -0,0 +1,3 @@
|
||||
1 77328912 Ben
|
||||
2 77328913 Jim
|
||||
3 77328914 Bill
|
18
tests/queries/0_stateless/03013_json_key_ignore_case.sh
Normal file
18
tests/queries/0_stateless/03013_json_key_ignore_case.sh
Normal file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# NOTE: this sh wrapper is required because of shell_config
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
|
||||
USER_FILES_PATH=$($CLICKHOUSE_CLIENT --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep -E '^Code: 107.*FILE_DOESNT_EXIST' | head -1 | awk '{gsub("/nonexist.txt","",$9); print $9}')
|
||||
|
||||
cp "$CURDIR"/data_json/key_ignore_case.json $USER_FILES_PATH/
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl"
|
||||
$CLICKHOUSE_CLIENT -q "create table test_tbl (id UInt16, reqid UInt32, name String) engine=MergeTree order by id"
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO test_tbl SELECT * FROM file('key_ignore_case.json', 'JSONEachRow') SETTINGS input_format_json_ignore_key_case=true"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test_tbl"
|
||||
$CLICKHOUSE_CLIENT -q "drop table test_tbl"
|
BIN
tests/queries/0_stateless/data_json/key_ignore_case.json
Normal file
BIN
tests/queries/0_stateless/data_json/key_ignore_case.json
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user