mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge pull request #45231 from Avogar/json-tuples
Insert default values in case of missing tuple elements in JSONEachRow
This commit is contained in:
commit
bdb3517512
@ -1203,12 +1203,14 @@ SELECT * FROM json_each_row_nested
|
||||
- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
|
||||
- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`.
|
||||
- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`.
|
||||
- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
|
||||
- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
|
||||
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
|
||||
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
|
||||
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
|
||||
- [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`.
|
||||
- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`.
|
||||
- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`.
|
||||
- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`.
|
||||
- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`.
|
||||
- [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`.
|
||||
|
||||
|
@ -4122,7 +4122,20 @@ Enabled by default.
|
||||
|
||||
Serialize named tuple columns as JSON objects.
|
||||
|
||||
Disabled by default.
|
||||
Enabled by default.
|
||||
|
||||
### input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects}
|
||||
|
||||
Parse named tuple columns as JSON objects.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple}
|
||||
|
||||
Insert default values for missing elements in JSON object while parsing named tuple.
|
||||
This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled.
|
||||
|
||||
Enabled by default.
|
||||
|
||||
### output_format_json_array_of_rows {#output_format_json_array_of_rows}
|
||||
|
||||
|
@ -773,6 +773,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, input_format_json_validate_types_from_metadata, true, "For JSON/JSONCompact/JSONColumnsWithMetadata input formats this controls whether format parser should check if data types from input metadata match data types of the corresponding columns from the table", 0) \
|
||||
M(Bool, input_format_json_read_numbers_as_strings, false, "Allow to parse numbers as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_read_objects_as_strings, true, "Allow to parse JSON objects as strings in JSON input formats", 0) \
|
||||
M(Bool, input_format_json_named_tuples_as_objects, true, "Deserialize named tuple columns as JSON objects", 0) \
|
||||
M(Bool, input_format_json_defaults_for_missing_elements_in_named_tuple, true, "Insert default value in named tuple element if it's missing in json object", 0) \
|
||||
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
|
||||
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \
|
||||
|
@ -80,7 +80,8 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"}}},
|
||||
{"23.1", {{"input_format_json_read_objects_as_strings", 0, 1, "Enable reading nested json objects as strings while object type is experimental"},
|
||||
{"input_format_json_defaults_for_missing_elements_in_named_tuple", false, true, "Allow missing elements in JSON objects while reading named tuples by default"}}},
|
||||
{"22.12", {{"max_size_to_preallocate_for_aggregation", 10'000'000, 100'000'000, "This optimizes performance"},
|
||||
{"query_plan_aggregation_in_order", 0, 1, "Enable some refactoring around query plan"},
|
||||
{"format_binary_max_string_size", 0, 1_GiB, "Prevent allocating large amount of memory"}}},
|
||||
|
@ -16,6 +16,7 @@ namespace ErrorCodes
|
||||
{
|
||||
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
|
||||
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
|
||||
extern const int INCORRECT_DATA;
|
||||
}
|
||||
|
||||
|
||||
@ -154,7 +155,7 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co
|
||||
|
||||
void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
|
||||
{
|
||||
if (settings.json.named_tuples_as_objects
|
||||
if (settings.json.write_named_tuples_as_objects
|
||||
&& have_explicit_names)
|
||||
{
|
||||
writeChar('{', ostr);
|
||||
@ -185,7 +186,7 @@ void SerializationTuple::serializeTextJSON(const IColumn & column, size_t row_nu
|
||||
|
||||
void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
|
||||
{
|
||||
if (settings.json.named_tuples_as_objects
|
||||
if (settings.json.read_named_tuples_as_objects
|
||||
&& have_explicit_names)
|
||||
{
|
||||
skipWhitespaceIfAny(istr);
|
||||
@ -194,12 +195,15 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
|
||||
addElementSafe(elems.size(), column, [&]
|
||||
{
|
||||
// Require all elements but in arbitrary order.
|
||||
for (size_t i = 0; i < elems.size(); ++i)
|
||||
std::vector<UInt8> seen_elements(elems.size(), 0);
|
||||
size_t i = 0;
|
||||
while (!istr.eof() && *istr.position() != '}')
|
||||
{
|
||||
if (i == elems.size())
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected number of elements in named tuple. Expected no more than {}", elems.size());
|
||||
|
||||
if (i > 0)
|
||||
{
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar(',', istr);
|
||||
skipWhitespaceIfAny(istr);
|
||||
}
|
||||
@ -211,12 +215,35 @@ void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr
|
||||
skipWhitespaceIfAny(istr);
|
||||
|
||||
const size_t element_pos = getPositionByName(name);
|
||||
seen_elements[element_pos] = 1;
|
||||
auto & element_column = extractElementColumn(column, element_pos);
|
||||
elems[element_pos]->deserializeTextJSON(element_column, istr, settings);
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
++i;
|
||||
}
|
||||
|
||||
skipWhitespaceIfAny(istr);
|
||||
assertChar('}', istr);
|
||||
|
||||
/// Check if we have missing elements.
|
||||
if (i != elems.size())
|
||||
{
|
||||
for (size_t element_pos = 0; element_pos != seen_elements.size(); ++element_pos)
|
||||
{
|
||||
if (seen_elements[element_pos])
|
||||
continue;
|
||||
|
||||
if (!settings.json.defaults_for_missing_elements_in_named_tuple)
|
||||
throw Exception(
|
||||
ErrorCodes::INCORRECT_DATA,
|
||||
"JSON object doesn't contain tuple element {}. If you want to insert defaults in case of missing elements, "
|
||||
"enable setting input_format_json_defaults_for_missing_elements_in_named_tuple",
|
||||
elems[element_pos]->getElementName());
|
||||
|
||||
auto & element_column = extractElementColumn(column, element_pos);
|
||||
element_column.insertDefault();
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
else
|
||||
|
@ -90,7 +90,9 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
|
||||
format_settings.json.array_of_rows = settings.output_format_json_array_of_rows;
|
||||
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
|
||||
format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
|
||||
format_settings.json.write_named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
|
||||
format_settings.json.read_named_tuples_as_objects = settings.input_format_json_named_tuples_as_objects;
|
||||
format_settings.json.defaults_for_missing_elements_in_named_tuple = settings.input_format_json_defaults_for_missing_elements_in_named_tuple;
|
||||
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
|
||||
format_settings.json.quote_64bit_floats = settings.output_format_json_quote_64bit_floats;
|
||||
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
|
||||
|
@ -153,7 +153,9 @@ struct FormatSettings
|
||||
bool quote_denormals = true;
|
||||
bool quote_decimals = false;
|
||||
bool escape_forward_slashes = true;
|
||||
bool named_tuples_as_objects = false;
|
||||
bool read_named_tuples_as_objects = false;
|
||||
bool write_named_tuples_as_objects = false;
|
||||
bool defaults_for_missing_elements_in_named_tuple = false;
|
||||
bool serialize_as_strings = false;
|
||||
bool read_bools_as_numbers = true;
|
||||
bool read_numbers_as_strings = true;
|
||||
|
@ -0,0 +1,6 @@
|
||||
(1,2,NULL)
|
||||
(1,2,NULL)
|
||||
(NULL,NULL,NULL)
|
||||
1
|
||||
1
|
||||
1
|
19
tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh
Executable file
19
tests/queries/0_stateless/02532_json_missing_named_tuple_elements.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: long
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
echo '{"t" : {"a" : 1, "b" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table"
|
||||
|
||||
echo '{"t" : { "a" : 1 , "b" : 2 } }' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table"
|
||||
|
||||
echo '{"t" : {}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table"
|
||||
|
||||
echo '{"t" : {"a" : 1, "b" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" --input_format_json_defaults_for_missing_elements_in_named_tuple=0 2>&1 | grep -F "INCORRECT_DATA" -c
|
||||
|
||||
echo '{"t" : {"a" : 1, "d" : 2}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "NOT_FOUND_COLUMN_IN_BLOCK" -c
|
||||
|
||||
echo '{"t" : {"a" : 1, "b" : 2, "c" : 3, "d" : 4}}' | $CLICKHOUSE_LOCAL --input-format=NDJSON --structure='t Tuple(a Nullable(UInt32), b Nullable(UInt32), c Nullable(UInt32))' -q "select * from table" 2>&1 | grep -F "INCORRECT_DATA" -c
|
||||
|
Loading…
Reference in New Issue
Block a user