diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index df96b8129f1..4cf8b8bd1c5 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1396,6 +1396,7 @@ SELECT * FROM json_each_row_nested - [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - ignore unknown keys in json object for named tuples. Default value - `false`. - [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_json_throw_on_bad_escape_sequence](/docs/en/operations/settings/settings-formats.md/#input_format_json_throw_on_bad_escape_sequence) - throw an exception if JSON string contains bad escape sequence. If disabled, bad escape sequences will remain as is in the data. Default value - `true`. +- [input_format_json_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_json_empty_as_default) - treat empty fields in JSON input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too. - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. - [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 5aad8db2809..c012d065574 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -752,6 +752,17 @@ Possible values: Default value: 0. +### input_format_json_empty_as_default {#input_format_json_empty_as_default} + +When enabled, replace empty input fields in JSON with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Possible values: + ++ 0 — Disable. ++ 1 — Enable. + +Default value: 0. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 3a2c7b12b13..1cd977f6725 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1144,6 +1144,7 @@ class IColumn; M(Bool, input_format_try_infer_variants, false, "Try to infer the Variant type in text formats when there is more than one possible type for column/array elements", 0) \ M(Bool, type_json_skip_duplicated_paths, false, "When enabled, during parsing JSON object into JSON type duplicated paths will be ignored and only the first one will be inserted instead of an exception", 0) \ M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \ + M(Bool, input_format_json_empty_as_default, false, "Treat empty fields in JSON input as default values.", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 6bd354ce05b..eb47c221c3d 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -71,6 +71,7 @@ static std::initializer_list #include +#include namespace DB { @@ -615,28 +616,49 @@ void SerializationArray::serializeTextJSONPretty(const IColumn & column, size_t } -void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +ReturnType SerializationArray::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, istr, - [&](IColumn & nested_column) + auto deserialize_nested = [&settings, this](IColumn & nested_column, ReadBuffer & buf) -> ReturnType + { + if constexpr (std::is_same_v) { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); else - nested->deserializeTextJSON(nested_column, istr, settings); - }, false); + nested->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + return nested->tryDeserializeTextJSON(nested_column, buf, settings); + } + }; + + if (settings.json.empty_as_default) + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(nested_column, istr, deserialize_nested); + }, false); + else + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return deserialize_nested(nested_column, istr); + }, false); +} + + +void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONImpl(column, istr, settings); } bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto read_nested = [&](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - return nested->tryDeserializeTextJSON(nested_column, istr, settings); - }; - - return deserializeTextImpl(column, istr, std::move(read_nested), false); + return deserializeTextJSONImpl(column, istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index c3353f0c251..7e34abfac90 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -82,6 +82,10 @@ public: SerializationPtr create(const SerializationPtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; }; + +private: + template + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index c722b3ac7a1..ae864cbf7b4 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -316,28 +317,52 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro } -void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +ReturnType SerializationMap::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, istr, - [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) + auto deserialize_nested = [&settings](IColumn & subcolumn, ReadBuffer & buf, const SerializationPtr & subcolumn_serialization) -> ReturnType + { + if constexpr (std::is_same_v) { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); else subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); - }); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); + return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); + } + }; + + if (settings.json.empty_as_default) + return deserializeTextImpl(column, istr, + [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(subcolumn, buf, + [&deserialize_nested, &subcolumn_serialization](IColumn & subcolumn_, ReadBuffer & buf_) -> ReturnType + { + return deserialize_nested(subcolumn_, buf_, subcolumn_serialization); + }); + }); + else + return deserializeTextImpl(column, istr, + [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + return deserialize_nested(subcolumn, buf, subcolumn_serialization); + }); +} + +void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONImpl(column, istr, settings); } bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); - return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); - }; - - return deserializeTextImpl(column, istr, reader); + return deserializeTextJSONImpl(column, istr, settings); } void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index cfcde445c1f..007d153ec7e 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -74,6 +74,9 @@ private: template ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const; + + template + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 594a23ab507..e1fcb1a8d48 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -313,27 +314,9 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t } template -ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationTuple::deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const { - static constexpr bool throw_exception = std::is_same_v; - - auto deserialize_element = [&](IColumn & element_column, size_t element_pos) - { - if constexpr (throw_exception) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]); - else - elems[element_pos]->deserializeTextJSON(element_column, istr, settings); - return true; - } - else - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]); - return elems[element_pos]->tryDeserializeTextJSON(element_column, istr, settings); - } - }; + static constexpr auto throw_exception = std::is_same_v; if (settings.json.read_named_tuples_as_objects && have_explicit_names) @@ -506,12 +489,51 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf } } -void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +template +ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextJSONImpl(column, istr, settings); + auto deserialize_nested = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType + { + if constexpr (std::is_same_v) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + nested_column_serialization->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings); + } + }; + + if (settings.json.empty_as_default) + return deserializeTupleJSONImpl(column, istr, settings, + [&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(nested_column, istr, + [&deserialize_nested, element_pos, this](IColumn & nested_column_, ReadBuffer & buf) -> ReturnType + { + return deserialize_nested(nested_column_, buf, elems[element_pos]); + }); + }); + else + return deserializeTupleJSONImpl(column, istr, settings, + [&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return deserialize_nested(nested_column, istr, elems[element_pos]); + }); } -bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONImpl(column, istr, settings); +} + +bool SerializationTuple::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { return deserializeTextJSONImpl(column, istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 810673d8b21..c51adb6e536 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -81,7 +81,10 @@ private: template ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const; - template + template + ReturnType deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; + + template ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; template diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index f9eb586d647..5a7ed523192 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -152,6 +152,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; + format_settings.json.empty_as_default = settings.input_format_json_empty_as_default; format_settings.json.type_json_skip_duplicated_paths = settings.type_json_skip_duplicated_paths; format_settings.null_as_default = settings.input_format_null_as_default; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 8b489812662..00b32ae172f 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -237,6 +237,7 @@ struct FormatSettings bool infer_incomplete_types_as_strings = true; bool throw_on_bad_escape_sequence = true; bool ignore_unnecessary_fields = true; + bool empty_as_default = false; bool type_json_skip_duplicated_paths = false; } json{}; diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 123f2e4f608..e4d43140ca0 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -2,12 +2,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include @@ -286,11 +288,19 @@ namespace JSONUtils return true; } - if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization); + auto deserialize = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf) -> bool + { + if (as_nullable) + return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf, format_settings, serialization); - serialization->deserializeTextJSON(column, in, format_settings); - return true; + serialization->deserializeTextJSON(column_, buf, format_settings); + return true; + }; + + if (format_settings.json.empty_as_default) + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, in, deserialize); + else + return deserialize(column, in); } catch (Exception & e) { @@ -920,6 +930,78 @@ namespace JSONUtils } } + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested) + { + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + if (istr.eof() || *istr.position() != EMPTY_STRING[0]) + return deserialize_nested(column, istr); + + auto do_deserialize = [](IColumn & column_, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + column_.insertDefault(); + return ReturnType(default_column_return_value); + } + return deserialize(column_, buf); + }; + + if (istr.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf) -> bool + { + auto * pos = buf.position(); + if (checkString(EMPTY_STRING, buf)) + return true; + buf.position() = pos; + return false; + }; + + return do_deserialize(column, istr, check_for_empty_string, deserialize_nested); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf) -> bool + { + auto & peekable_buf = assert_cast(buf); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + peekable_buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested_with_check = [&deserialize_nested](IColumn & column_, ReadBuffer & buf) -> ReturnType + { + auto & peekable_buf = assert_cast(buf); + if constexpr (throw_exception) + deserialize_nested(column_, peekable_buf); + else if (!deserialize_nested(column_, peekable_buf)) + return ReturnType(false); + + if (unlikely(peekable_buf.hasUnreadData())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); + + return ReturnType(true); + }; + + PeekableReadBuffer peekable_buf(istr, true); + return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_nested_with_check); + } + + template void deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); } } diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index 622703947b9..492da52eb7e 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace DB @@ -146,6 +147,16 @@ namespace JSONUtils bool skipUntilFieldInObject(ReadBuffer & in, const String & desired_field_name, const FormatSettings::JSON & settings); void skipTheRestOfObject(ReadBuffer & in, const FormatSettings::JSON & settings); + + template + using NestedDeserialize = std::function; + + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + + extern template void deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + extern template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + extern template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); } } diff --git a/tests/queries/0_stateless/03222_json_empty_as_default.reference b/tests/queries/0_stateless/03222_json_empty_as_default.reference new file mode 100644 index 00000000000..1a98f45577a --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default.reference @@ -0,0 +1,47 @@ +-- Simple types +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Date', '{"x":""}'); +1970-01-01 +SELECT x FROM format(JSONEachRow, 'x Date32', '{"x":""}'); +1970-01-01 +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime', '{"x":""}'); +1970-01-01 00:00:00 +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime64', '{"x":""}'); +1970-01-01 00:00:00.000 +SELECT x FROM format(JSONEachRow, 'x IPv4', '{"x":""}'); +0.0.0.0 +SELECT x FROM format(JSONEachRow, 'x IPv6', '{"x":""}'); +:: +SELECT x FROM format(JSONEachRow, 'x UUID', '{"x":""}'); +00000000-0000-0000-0000-000000000000 +-- { echoOn } +SELECT COUNT(DISTINCT col) FROM table1; +1 +-- { echoOn } +SELECT * FROM table1 ORDER BY address ASC; +:: +2001:db8:3333:4444:5555:6666:7777:8888 +-- Nullable +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); +\N +-- Compound types +SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); +['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e','00000000-0000-0000-0000-000000000000'] +SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); +[NULL,NULL] +SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); +('1970-01-01','0.0.0.0','abc') +SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); +{'abc':'::'} +SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); +\N +-- Deep composition +SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); +[['2001:db8:3333:4444:cccc:dddd:eeee:ffff','::'],['::','2001:db8:3333:4444:5555:6666:7777:8888']] +SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b15f852c-c41a-4fd6-9247-1929c841715e"]}'); +['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e'] +SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); +(['00000000-0000-0000-0000-000000000000'],('00000000-0000-0000-0000-000000000000',{'abc':'::'})) +SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}'); +{('1970-01-01','0.0.0.0'):NULL} diff --git a/tests/queries/0_stateless/03222_json_empty_as_default.sql b/tests/queries/0_stateless/03222_json_empty_as_default.sql new file mode 100644 index 00000000000..1243d450c2e --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default.sql @@ -0,0 +1,60 @@ +SET input_format_json_empty_as_default = 1, allow_experimental_variant_type = 1; + +-- Simple types +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Date', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x Date32', '{"x":""}'); +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime', '{"x":""}'); +SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime64', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x IPv4', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x IPv6', '{"x":""}'); +SELECT x FROM format(JSONEachRow, 'x UUID', '{"x":""}'); +-- { echoOff } + +-- Simple type AggregateFunction +DROP TABLE IF EXISTS table1; +CREATE TABLE table1(col AggregateFunction(uniq, UInt64)) ENGINE=Memory(); +DROP TABLE IF EXISTS table2; +CREATE TABLE table2(UserID UInt64) ENGINE=Memory(); + +INSERT INTO table1 SELECT uniqState(UserID) FROM table2; +INSERT INTO table1 SELECT x FROM format(JSONEachRow, 'x AggregateFunction(uniq, UInt64)' AS T, '{"x":""}'); + +-- { echoOn } +SELECT COUNT(DISTINCT col) FROM table1; +-- { echoOff } + +DROP TABLE table1; +DROP TABLE table2; + +-- The setting input_format_defaults_for_omitted_fields determines the default value if enabled. +CREATE TABLE table1(address IPv6 DEFAULT toIPv6('2001:db8:3333:4444:5555:6666:7777:8888')) ENGINE=Memory(); + +SET input_format_defaults_for_omitted_fields = 0; +INSERT INTO table1 FORMAT JSONEachRow {"address":""}; + +SET input_format_defaults_for_omitted_fields = 1; +INSERT INTO table1 FORMAT JSONEachRow {"address":""}; + +-- { echoOn } +SELECT * FROM table1 ORDER BY address ASC; +-- { echoOff } + +DROP TABLE table1; + +-- Nullable +-- { echoOn } +SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); + +-- Compound types +SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); +SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); +SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); +SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); +SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); + +-- Deep composition +SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); +SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b15f852c-c41a-4fd6-9247-1929c841715e"]}'); +SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); +SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}'); diff --git a/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference new file mode 100644 index 00000000000..8176d7895d8 --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference @@ -0,0 +1,8 @@ +Array(UUID) +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6))) +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} diff --git a/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh new file mode 100755 index 00000000000..6b69fb2e9dc --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.json + +# Wrapper for clickhouse-client to always output in JSONEachRow format, that +# way format settings will not affect output. +function clickhouse_local() +{ + $CLICKHOUSE_LOCAL --output-format JSONEachRow "$@" +} + +echo 'Array(UUID)' +echo '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}' > $DATA_FILE +# Use increasingly smaller read buffers. +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=4" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=2" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1" + +echo 'Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))' +echo '{"x":[[""], ["",{"abc":""}]]}' > $DATA_FILE +# Use increasingly smaller read buffers. +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=16" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=8" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1" + +rm $DATA_FILE