diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index e385f72fb38..ae08566ebd2 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -88,7 +88,6 @@ static std::initializer_list -ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const SerializationPtr & nested, const FormatSettings & settings) +ReturnType SerializationArray::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto deserializer = [&nested](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType + auto deserialize_nested = [&settings, this](IColumn & nested_column, ReadBuffer & buf) -> ReturnType { - auto adapter = [&deserialize_nested, &istr_, &nested](IColumn & nested_column) -> ReturnType + if constexpr (std::is_same_v) { - return deserialize_nested(nested_column, istr_, nested); - }; - return deserializeTextImpl(column_, istr_, std::move(adapter), false); + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + else + nested->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + return nested->tryDeserializeTextJSON(nested_column, buf, settings); + } }; - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); -} + if (settings.json.empty_as_default) + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(nested_column, istr, deserialize_nested); + }, false); + else + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return deserialize_nested(nested_column, istr); + }, false); } + void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - if (settings.json.empty_as_default) - deserializeEmpyStringAsDefaultOrNested(column, istr, nested, settings); - else - deserializeTextImpl(column, istr, - [&settings, &istr, this](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - else - nested->deserializeTextJSON(nested_column, istr, settings); - }, false); + deserializeTextJSONImpl(column, istr, settings); } bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - if (settings.json.empty_as_default) - return deserializeEmpyStringAsDefaultOrNested(column, istr, nested, settings); - - return deserializeTextImpl(column, istr, - [&settings, &istr, this](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - return nested->tryDeserializeTextJSON(nested_column, istr, settings); - }, false); + return deserializeTextJSONImpl(column, istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index c3353f0c251..7e34abfac90 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -82,6 +82,10 @@ public: SerializationPtr create(const SerializationPtr & prev) const override; ColumnPtr create(const ColumnPtr & prev) const override; }; + +private: + template + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index 96c21f19805..ae864cbf7b4 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -316,48 +316,53 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro writeChar('}', ostr); } + template -ReturnType SerializationMap::deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationMap::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto deserializer = [this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType + auto deserialize_nested = [&settings](IColumn & subcolumn, ReadBuffer & buf, const SerializationPtr & subcolumn_serialization) -> ReturnType { - auto adapter = [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & nested_column_serialization, IColumn & nested_column) -> ReturnType + if constexpr (std::is_same_v) { - return deserialize_nested(nested_column, buf, nested_column_serialization); - }; - return this->deserializeTextImpl(column_, istr_, std::move(adapter)); - }; - - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); -} - -void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const -{ - if (settings.json.empty_as_default) - deserializeEmpyStringAsDefaultOrNested(column, istr, settings); - else - deserializeTextImpl(column, istr, - [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); - else - subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); - }); -} - -bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const -{ - if (settings.json.empty_as_default) - return deserializeEmpyStringAsDefaultOrNested(column, istr, settings); - - return deserializeTextImpl(column, istr, - [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); + else + subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); + } + else { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); - }); + } + }; + + if (settings.json.empty_as_default) + return deserializeTextImpl(column, istr, + [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(subcolumn, buf, + [&deserialize_nested, &subcolumn_serialization](IColumn & subcolumn_, ReadBuffer & buf_) -> ReturnType + { + return deserialize_nested(subcolumn_, buf_, subcolumn_serialization); + }); + }); + else + return deserializeTextImpl(column, istr, + [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + return deserialize_nested(subcolumn, buf, subcolumn_serialization); + }); +} + +void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONImpl(column, istr, settings); +} + +bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + return deserializeTextJSONImpl(column, istr, settings); } void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index ddf8047f061..007d153ec7e 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -76,7 +76,7 @@ private: ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const; template - ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index f8cb894c19b..e1fcb1a8d48 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -314,7 +314,7 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t } template -ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const +ReturnType SerializationTuple::deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const { static constexpr auto throw_exception = std::is_same_v; @@ -490,48 +490,52 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf } template -ReturnType SerializationTuple::deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto deserializer = [&settings, this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType + auto deserialize_nested = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType { - auto adapter = [&deserialize_nested, &istr_, this](IColumn & nested_column, size_t element_pos) -> ReturnType + if constexpr (std::is_same_v) { - return deserialize_nested(nested_column, istr_, elems[element_pos]); - }; - - return deserializeTextJSONImpl(column_, istr_, settings, std::move(adapter)); + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + nested_column_serialization->deserializeTextJSON(nested_column, buf, settings); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); + else + return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings); + } }; - return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, istr, settings, std::move(deserializer)); -} - -void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const -{ if (settings.json.empty_as_default) - deserializeEmpyStringAsDefaultOrNested(column, istr, settings); + return deserializeTupleJSONImpl(column, istr, settings, + [&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(nested_column, istr, + [&deserialize_nested, element_pos, this](IColumn & nested_column_, ReadBuffer & buf) -> ReturnType + { + return deserialize_nested(nested_column_, buf, elems[element_pos]); + }); + }); else - deserializeTextJSONImpl(column, istr, settings, - [&settings, &istr, this](IColumn & nested_column, size_t element_pos) -> void - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, elems[element_pos]); - else - elems[element_pos]->deserializeTextJSON(nested_column, istr, settings); - }); + return deserializeTupleJSONImpl(column, istr, settings, + [&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType + { + return deserialize_nested(nested_column, istr, elems[element_pos]); + }); } -bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - if (settings.json.empty_as_default) - return deserializeEmpyStringAsDefaultOrNested(column, istr, settings); + deserializeTextJSONImpl(column, istr, settings); +} - return deserializeTextJSONImpl(column, istr, settings, - [&settings, &istr, this](IColumn & nested_column, size_t element_pos) -> bool - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, elems[element_pos]); - return elems[element_pos]->tryDeserializeTextJSON(nested_column, istr, settings); - }); +bool SerializationTuple::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + return deserializeTextJSONImpl(column, istr, settings); } diff --git a/src/DataTypes/Serializations/SerializationTuple.h b/src/DataTypes/Serializations/SerializationTuple.h index 54084617d3b..c51adb6e536 100644 --- a/src/DataTypes/Serializations/SerializationTuple.h +++ b/src/DataTypes/Serializations/SerializationTuple.h @@ -82,10 +82,10 @@ private: ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const; template - ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; + ReturnType deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const; - template - ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; + template + ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; template ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 4f0187b4d87..d85c9898074 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -2,11 +2,14 @@ #include #include #include +#include #include #include +#include #include #include #include +#include #include @@ -267,9 +270,6 @@ namespace JSONUtils const FormatSettings & format_settings, bool yield_strings) { - static constexpr auto EMPTY_STRING = "\"\""; - static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); - try { bool as_nullable = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type); @@ -288,70 +288,19 @@ namespace JSONUtils return true; } - auto do_deserialize = [](IColumn & column_, ReadBuffer & buf_, auto && check_for_empty_string, auto && deserialize) -> bool - { - if (check_for_empty_string(buf_)) - { - column_.insertDefault(); - return false; - } - else - return deserialize(column_, buf_); - }; - - auto deserialize_impl = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf_) -> bool + auto deserialize = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf) -> bool { if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf_, format_settings, serialization); + return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf, format_settings, serialization); - serialization->deserializeTextJSON(column_, buf_, format_settings); + serialization->deserializeTextJSON(column_, buf, format_settings); return true; }; - if (!format_settings.json.empty_as_default || in.eof() || *in.position() != EMPTY_STRING[0]) - return deserialize_impl(column, in); - - if (in.available() >= EMPTY_STRING_LENGTH) - { - /// We have enough data in buffer to check if we have an empty string. - auto check_for_empty_string = [](ReadBuffer & buf_) - { - auto * pos = buf_.position(); - if (checkString(EMPTY_STRING, buf_)) - return true; - buf_.position() = pos; - return false; - }; - - return do_deserialize(column, in, check_for_empty_string, deserialize_impl); - } - - /// We don't have enough data in buffer to check if we have an empty string. - /// Use PeekableReadBuffer to make a checkpoint before checking for an - /// empty string and rollback if check was failed. - - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - peekable_buf.setCheckpoint(); - SCOPE_EXIT(peekable_buf.dropCheckpoint()); - if (checkString(EMPTY_STRING, peekable_buf)) - return true; - peekable_buf.rollbackToCheckpoint(); - return false; - }; - - auto deserialize_impl_with_check = [&deserialize_impl](IColumn & column_, ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - bool res = deserialize_impl(column_, peekable_buf); - if (unlikely(peekable_buf.hasUnreadData())) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); - return res; - }; - - PeekableReadBuffer peekable_buf(in, true); - return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_impl_with_check); + if (format_settings.json.empty_as_default) + return JSONUtils::deserializeEmpyStringAsDefaultOrNested(column, in, deserialize); + else + return deserialize(column, in); } catch (Exception & e) { @@ -915,6 +864,78 @@ namespace JSONUtils } } + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested) + { + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + if (istr.eof() || *istr.position() != EMPTY_STRING[0]) + return deserialize_nested(column, istr); + + auto do_deserialize = [](IColumn & column_, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + column_.insertDefault(); + return ReturnType(default_column_return_value); + } + return deserialize(column_, buf); + }; + + if (istr.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf) -> bool + { + auto * pos = buf.position(); + if (checkString(EMPTY_STRING, buf)) + return true; + buf.position() = pos; + return false; + }; + + return do_deserialize(column, istr, check_for_empty_string, deserialize_nested); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf) -> bool + { + auto & peekable_buf = assert_cast(buf); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + peekable_buf.rollbackToCheckpoint(); + return false; + }; + + auto deserialize_nested_with_check = [&deserialize_nested](IColumn & column_, ReadBuffer & buf) -> ReturnType + { + auto & peekable_buf = assert_cast(buf); + if constexpr (throw_exception) + deserialize_nested(column_, peekable_buf); + else if (!deserialize_nested(column_, peekable_buf)) + return ReturnType(false); + + if (unlikely(peekable_buf.hasUnreadData())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); + + return ReturnType(true); + }; + + PeekableReadBuffer peekable_buf(istr, true); + return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_nested_with_check); + } + + template void deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); } } diff --git a/src/Formats/JSONUtils.h b/src/Formats/JSONUtils.h index a8e7113388a..50e182c7d27 100644 --- a/src/Formats/JSONUtils.h +++ b/src/Formats/JSONUtils.h @@ -2,16 +2,13 @@ #include #include -#include #include #include -#include #include -#include #include #include -#include #include +#include #include namespace DB @@ -20,11 +17,6 @@ namespace DB class Block; struct JSONInferenceInfo; -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - namespace JSONUtils { std::pair fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows); @@ -147,104 +139,14 @@ namespace JSONUtils void skipTheRestOfObject(ReadBuffer & in, const FormatSettings::JSON & settings); template - ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserializer) - { - static constexpr auto throw_exception = std::is_same_v; + using NestedDeserialize = std::function; - static constexpr auto EMPTY_STRING = "\"\""; - static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + template + ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); - auto do_deserialize_nested = [](IColumn & nested_column, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize, const SerializationPtr & nested_column_serialization) -> ReturnType - { - if (check_for_empty_string(buf)) - { - nested_column.insertDefault(); - return ReturnType(true); - } - return deserialize(nested_column, buf, nested_column_serialization); - }; - - auto deserialize_nested_impl = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType - { - if constexpr (throw_exception) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); - else - nested_column_serialization->deserializeTextJSON(nested_column, buf, settings); - } - else - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization); - return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings); - } - }; - - auto deserialize_nested = [&do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType - { - if (buf.eof() || *buf.position() != EMPTY_STRING[0]) - return deserialize_nested_impl(nested_column, buf, nested_column_serialization); - - if (buf.available() >= EMPTY_STRING_LENGTH) - { - /// We have enough data in buffer to check if we have an empty string. - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto * pos = buf_.position(); - if (checkString(EMPTY_STRING, buf_)) - return true; - buf_.position() = pos; - return false; - }; - - return do_deserialize_nested(nested_column, buf, check_for_empty_string, deserialize_nested_impl, nested_column_serialization); - } - - /// We don't have enough data in buffer to check if we have an empty string. - /// Use PeekableReadBuffer to make a checkpoint before checking for an - /// empty string and rollback if check was failed. - - auto check_for_empty_string = [](ReadBuffer & buf_) -> bool - { - auto & peekable_buf = assert_cast(buf_); - peekable_buf.setCheckpoint(); - SCOPE_EXIT(peekable_buf.dropCheckpoint()); - if (checkString(EMPTY_STRING, peekable_buf)) - return true; - peekable_buf.rollbackToCheckpoint(); - return false; - }; - - auto deserialize_nested_impl_with_check = [&deserialize_nested_impl](IColumn & nested_column_, ReadBuffer & buf_, const SerializationPtr & nested_column_serialization_) -> ReturnType - { - auto & peekable_buf = assert_cast(buf_); - - auto enforceNoUnreadData = [&peekable_buf]() -> void - { - if (unlikely(peekable_buf.hasUnreadData())) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available())); - }; - - if constexpr (throw_exception) - { - deserialize_nested_impl(nested_column_, peekable_buf, nested_column_serialization_); - enforceNoUnreadData(); - } - else - { - bool res = deserialize_nested_impl(nested_column_, peekable_buf, nested_column_serialization_); - enforceNoUnreadData(); - return res; - } - }; - - PeekableReadBuffer peekable_buf(buf, true); - return do_deserialize_nested(nested_column, peekable_buf, check_for_empty_string, deserialize_nested_impl_with_check, nested_column_serialization); - }; - - return deserializer(column, istr, deserialize_nested); - } + extern template void deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + extern template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); + extern template bool deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize & deserialize_nested); } } diff --git a/tests/queries/0_stateless/03215_json_empty_as_default.reference b/tests/queries/0_stateless/03222_json_empty_as_default.reference similarity index 100% rename from tests/queries/0_stateless/03215_json_empty_as_default.reference rename to tests/queries/0_stateless/03222_json_empty_as_default.reference diff --git a/tests/queries/0_stateless/03215_json_empty_as_default.sql b/tests/queries/0_stateless/03222_json_empty_as_default.sql similarity index 100% rename from tests/queries/0_stateless/03215_json_empty_as_default.sql rename to tests/queries/0_stateless/03222_json_empty_as_default.sql diff --git a/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference new file mode 100644 index 00000000000..8176d7895d8 --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.reference @@ -0,0 +1,8 @@ +Array(UUID) +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]} +Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6))) +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} +{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]} diff --git a/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh new file mode 100755 index 00000000000..6b69fb2e9dc --- /dev/null +++ b/tests/queries/0_stateless/03222_json_empty_as_default_small_read_buffer.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.json + +# Wrapper for clickhouse-client to always output in JSONEachRow format, that +# way format settings will not affect output. +function clickhouse_local() +{ + $CLICKHOUSE_LOCAL --output-format JSONEachRow "$@" +} + +echo 'Array(UUID)' +echo '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}' > $DATA_FILE +# Use increasingly smaller read buffers. +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=4" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=2" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1" + +echo 'Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))' +echo '{"x":[[""], ["",{"abc":""}]]}' > $DATA_FILE +# Use increasingly smaller read buffers. +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=16" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=8" +clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1" + +rm $DATA_FILE