From 0ff90e4a5c222caa93863dc5e4dcfa2816a64d20 Mon Sep 17 00:00:00 2001 From: Alexis Arnaud Date: Thu, 1 Aug 2024 01:31:31 +0200 Subject: [PATCH] post-review changes --- src/DataTypes/DataTypeAggregateFunction.h | 2 - src/DataTypes/DataTypeDate.h | 1 - src/DataTypes/DataTypeDate32.h | 1 - src/DataTypes/DataTypeDateTime.h | 1 - src/DataTypes/DataTypeDateTime64.h | 2 - src/DataTypes/DataTypeIPv4andIPv6.h | 2 - src/DataTypes/DataTypeUUID.h | 1 - src/DataTypes/IDataType.h | 3 - src/DataTypes/Serializations/ISerialization.h | 11 +- .../SerializationAggregateFunction.cpp | 2 +- .../SerializationAggregateFunction.h | 5 +- .../Serializations/SerializationArray.cpp | 128 +++++++++++++++--- .../SerializationAsStringNonTrivialJSON.h | 61 --------- .../Serializations/SerializationDate.cpp | 4 +- .../Serializations/SerializationDate.h | 7 +- .../Serializations/SerializationDate32.cpp | 4 +- .../Serializations/SerializationDate32.h | 7 +- .../Serializations/SerializationDateTime.cpp | 4 +- .../Serializations/SerializationDateTime.h | 7 +- .../SerializationDateTime64.cpp | 6 +- .../Serializations/SerializationDateTime64.h | 7 +- .../SerializationIPv4andIPv6.cpp | 4 +- .../Serializations/SerializationIPv4andIPv6.h | 7 +- .../Serializations/SerializationMap.cpp | 114 ++++++++++++++-- .../Serializations/SerializationMap.h | 3 + .../Serializations/SerializationNullable.cpp | 54 ++------ .../Serializations/SerializationNullable.h | 1 - .../Serializations/SerializationTuple.cpp | 89 +++++++++++- .../Serializations/SerializationUUID.cpp | 4 +- .../Serializations/SerializationUUID.h | 7 +- src/Formats/JSONUtils.cpp | 84 +++++++++--- src/IO/ReadHelpers.cpp | 17 --- src/IO/ReadHelpers.h | 4 - ... => 03215_json_empty_as_default.reference} | 8 +- ...lt.sql => 03215_json_empty_as_default.sql} | 0 35 files changed, 415 insertions(+), 247 deletions(-) delete mode 100644 src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h rename tests/queries/0_stateless/{03210_json_empty_as_default.reference => 03215_json_empty_as_default.reference} (95%) rename tests/queries/0_stateless/{03210_json_empty_as_default.sql => 03215_json_empty_as_default.sql} (100%) diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 2ab2e53100b..e3a4f9726d9 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -64,8 +64,6 @@ public: SerializationPtr doGetDefaultSerialization() const override; bool supportsSparseSerialization() const override { return false; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } - bool isVersioned() const; /// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according diff --git a/src/DataTypes/DataTypeDate.h b/src/DataTypes/DataTypeDate.h index cb7a603705d..0e08b9ba2ca 100644 --- a/src/DataTypes/DataTypeDate.h +++ b/src/DataTypes/DataTypeDate.h @@ -17,7 +17,6 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDate32.h b/src/DataTypes/DataTypeDate32.h index 53bd010b7cf..65633e7a228 100644 --- a/src/DataTypes/DataTypeDate32.h +++ b/src/DataTypes/DataTypeDate32.h @@ -18,7 +18,6 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 11b579920ba..5519240dee1 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -44,7 +44,6 @@ public: bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } bool equals(const IDataType & rhs) const override; diff --git a/src/DataTypes/DataTypeDateTime64.h b/src/DataTypes/DataTypeDateTime64.h index dd5ff7e6550..64cedd798d1 100644 --- a/src/DataTypes/DataTypeDateTime64.h +++ b/src/DataTypes/DataTypeDateTime64.h @@ -39,8 +39,6 @@ public: bool isSummable() const override { return false; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } - protected: SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/src/DataTypes/DataTypeIPv4andIPv6.h b/src/DataTypes/DataTypeIPv4andIPv6.h index 520af4f21e0..5aea55751a7 100644 --- a/src/DataTypes/DataTypeIPv4andIPv6.h +++ b/src/DataTypes/DataTypeIPv4andIPv6.h @@ -46,7 +46,6 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(IPv4); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override { return std::make_shared>(); } }; @@ -85,7 +84,6 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(IPv6); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override { return std::make_shared>(); } }; diff --git a/src/DataTypes/DataTypeUUID.h b/src/DataTypes/DataTypeUUID.h index 1aeab1b78ba..90cdd90d68d 100644 --- a/src/DataTypes/DataTypeUUID.h +++ b/src/DataTypes/DataTypeUUID.h @@ -42,7 +42,6 @@ public: size_t getSizeOfValueInMemory() const override { return sizeof(UUID); } bool isCategorial() const override { return true; } bool canBeInsideLowCardinality() const override { return true; } - bool isNonTriviallySerializedAsStringJSON() const override { return true; } SerializationPtr doGetDefaultSerialization() const override; }; diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 174531cad27..397ae3d8be9 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -328,9 +328,6 @@ public: /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column. static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint); - /// Checks if this is a non-numeric non-string type which is serialized as a JSON string. - virtual bool isNonTriviallySerializedAsStringJSON() const { return false; } - protected: friend class DataTypeFactory; friend class AggregateFunctionSimpleState; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index 3f4f4b8f875..5d0bf60c59f 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int NOT_IMPLEMENTED; } class IDataType; @@ -399,20 +398,12 @@ public: virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; virtual bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; - /// The following two methods are implemented only for non-numeric non-string simple data types. - virtual void deserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method deserializeTextNoEmptyCheckJSON is not supported"); - } - virtual bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported"); - } virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t /*indent*/) const { serializeTextJSON(column, row_num, ostr, settings); } + /** Text serialization for putting into the XML format. */ virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 39c3f389619..41b198890e4 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -182,7 +182,7 @@ void SerializationAggregateFunction::serializeTextJSON(const IColumn & column, s } -void SerializationAggregateFunction::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationAggregateFunction::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { String s; readJSONString(s, istr, settings.json); diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.h b/src/DataTypes/Serializations/SerializationAggregateFunction.h index 6afa1cd4e97..c45fc79f714 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.h +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.h @@ -3,13 +3,12 @@ #include #include -#include namespace DB { -class SerializationAggregateFunction final : public SerializationAsStringNonTrivialJSON +class SerializationAggregateFunction final : public ISerialization { private: AggregateFunctionPtr function; @@ -38,7 +37,7 @@ public: void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationArray.cpp b/src/DataTypes/Serializations/SerializationArray.cpp index 0a9c4529e23..eb24307d9e9 100644 --- a/src/DataTypes/Serializations/SerializationArray.cpp +++ b/src/DataTypes/Serializations/SerializationArray.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -614,29 +615,124 @@ void SerializationArray::serializeTextJSONPretty(const IColumn & column, size_t writeChar(']', ostr); } +namespace +{ +template +ReturnType deserializeTextJSONArrayImpl(IColumn & column, ReadBuffer & istr, const SerializationPtr & nested, const FormatSettings & settings) +{ + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + auto do_deserialize_nested = [](IColumn & nested_column, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + nested_column.insertDefault(); + return ReturnType(true); + } + return deserialize(nested_column, buf); + }; + + auto deserialize_nested_impl = [&settings, &nested](IColumn & nested_column, ReadBuffer & buf) -> ReturnType + { + if constexpr (throw_exception) + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + else + nested->deserializeTextJSON(nested_column, buf, settings); + return; + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested); + else + return nested->tryDeserializeTextJSON(nested_column, buf, settings); + } + }; + + auto deserialize_nested = [&settings, &do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf) -> ReturnType + { + if (!settings.json.empty_as_default || buf.eof() || *buf.position() != EMPTY_STRING[0]) + return deserialize_nested_impl(nested_column, buf); + + if (buf.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + else + { + buf_.position() = pos; + return false; + } + }; + + return do_deserialize_nested(nested_column, buf, check_for_empty_string, deserialize_nested_impl); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + else + { + peekable_buf.rollbackToCheckpoint(); + return false; + } + }; + + auto deserialize_nested_impl_with_check = [&deserialize_nested_impl](IColumn & nested_column_, ReadBuffer & buf_) -> ReturnType + { + auto & peekable_buf = assert_cast(buf_); + if constexpr (throw_exception) + { + deserialize_nested_impl(nested_column_, peekable_buf); + assert(!peekable_buf.hasUnreadData()); + } + else + { + if (!deserialize_nested_impl(nested_column_, peekable_buf)) + return false; + if (likely(!peekable_buf.hasUnreadData())) + return true; + return false; + } + }; + + PeekableReadBuffer peekable_buf(buf, true); + return do_deserialize_nested(nested_column, peekable_buf, check_for_empty_string, deserialize_nested_impl_with_check); + }; + + return deserializeTextImpl(column, istr, + [&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType + { + return deserialize_nested(nested_column, istr); + }, false); +} +} void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, istr, - [&](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - else - nested->deserializeTextJSON(nested_column, istr, settings); - }, false); + deserializeTextJSONArrayImpl(column, istr, nested, settings); } bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto read_nested = [&](IColumn & nested_column) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested); - return nested->tryDeserializeTextJSON(nested_column, istr, settings); - }; - - return deserializeTextImpl(column, istr, std::move(read_nested), false); + return deserializeTextJSONArrayImpl(column, istr, nested, settings); } diff --git a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h b/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h deleted file mode 100644 index f52e1982830..00000000000 --- a/src/DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h +++ /dev/null @@ -1,61 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -/** Serialization for non-numeric non-string data types serialized as JSON strings - * For these data types, we support an option, input_format_json_empty_as_default, which, when set to 1, - * allows for JSON deserialization to treat an encountered empty string as a default value for the specified type. - * Derived classes must implement the following methods: - * deserializeTextNoEmptyCheckJSON() and tryDeserializeTextNoEmptyCheckJSON() - * instead of deserializeTextJSON() and tryDeserializeTextJSON() respectively. - */ -template -requires std::derived_from -class SerializationAsStringNonTrivialJSON : public T -{ -public: - using T::T; - - void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & format_settings) const override - { - if (format_settings.json.empty_as_default && tryMatchEmptyString(istr)) - column.insertDefault(); - else - deserializeTextNoEmptyCheckJSON(column, istr, format_settings); - } - - bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & format_settings) const override - { - if (format_settings.json.empty_as_default && tryMatchEmptyString(istr)) - { - column.insertDefault(); - return true; - } - else - return tryDeserializeTextNoEmptyCheckJSON(column, istr, format_settings); - } - - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override = 0; - - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported"); - } -}; - -} diff --git a/src/DataTypes/Serializations/SerializationDate.cpp b/src/DataTypes/Serializations/SerializationDate.cpp index 3f122189c22..38e1bb87b6d 100644 --- a/src/DataTypes/Serializations/SerializationDate.cpp +++ b/src/DataTypes/Serializations/SerializationDate.cpp @@ -85,7 +85,7 @@ void SerializationDate::serializeTextJSON(const IColumn & column, size_t row_num writeChar('"', ostr); } -void SerializationDate::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum x; assertChar('"', istr); @@ -94,7 +94,7 @@ void SerializationDate::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBu assert_cast(column).getData().push_back(x); } -bool SerializationDate::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationDate::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { DayNum x; if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDate.h b/src/DataTypes/Serializations/SerializationDate.h index 10c83171527..dcf79eb49da 100644 --- a/src/DataTypes/Serializations/SerializationDate.h +++ b/src/DataTypes/Serializations/SerializationDate.h @@ -1,13 +1,12 @@ #pragma once #include -#include #include namespace DB { -class SerializationDate final : public SerializationAsStringNonTrivialJSON> +class SerializationDate final : public SerializationNumber { public: explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::instance()); @@ -22,8 +21,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDate32.cpp b/src/DataTypes/Serializations/SerializationDate32.cpp index 8ad07b534ce..70a22d59e42 100644 --- a/src/DataTypes/Serializations/SerializationDate32.cpp +++ b/src/DataTypes/Serializations/SerializationDate32.cpp @@ -83,7 +83,7 @@ void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_n writeChar('"', ostr); } -void SerializationDate32::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ExtendedDayNum x; assertChar('"', istr); @@ -92,7 +92,7 @@ void SerializationDate32::deserializeTextNoEmptyCheckJSON(IColumn & column, Read assert_cast(column).getData().push_back(x); } -bool SerializationDate32::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationDate32::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { ExtendedDayNum x; if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDate32.h b/src/DataTypes/Serializations/SerializationDate32.h index ac6239fbc2b..be2e2b76c1d 100644 --- a/src/DataTypes/Serializations/SerializationDate32.h +++ b/src/DataTypes/Serializations/SerializationDate32.h @@ -1,12 +1,11 @@ #pragma once #include -#include #include namespace DB { -class SerializationDate32 final : public SerializationAsStringNonTrivialJSON> +class SerializationDate32 final : public SerializationNumber { public: explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::instance()); @@ -21,8 +20,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index 80b2d51140d..c5c819ce7fa 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -180,7 +180,7 @@ void SerializationDateTime::serializeTextJSON(const IColumn & column, size_t row writeChar('"', ostr); } -void SerializationDateTime::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { time_t x = 0; if (checkChar('"', istr)) @@ -196,7 +196,7 @@ void SerializationDateTime::deserializeTextNoEmptyCheckJSON(IColumn & column, Re assert_cast(column).getData().push_back(static_cast(x)); } -bool SerializationDateTime::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +bool SerializationDateTime::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { time_t x = 0; if (checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDateTime.h b/src/DataTypes/Serializations/SerializationDateTime.h index 0041f221ccf..584b0c4116b 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.h +++ b/src/DataTypes/Serializations/SerializationDateTime.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include class DateLUTImpl; @@ -9,7 +8,7 @@ class DateLUTImpl; namespace DB { -class SerializationDateTime final : public SerializationAsStringNonTrivialJSON>, public TimezoneMixin +class SerializationDateTime final : public SerializationNumber, public TimezoneMixin { public: explicit SerializationDateTime(const TimezoneMixin & time_zone_); @@ -24,8 +23,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index cdac7f785d1..442e29edd52 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -15,7 +15,7 @@ namespace DB SerializationDateTime64::SerializationDateTime64( UInt32 scale_, const TimezoneMixin & time_zone_) - : SerializationAsStringNonTrivialJSON>(DecimalUtils::max_precision, scale_) + : SerializationDecimalBase(DecimalUtils::max_precision, scale_) , TimezoneMixin(time_zone_) { } @@ -170,7 +170,7 @@ void SerializationDateTime64::serializeTextJSON(const IColumn & column, size_t r writeChar('"', ostr); } -void SerializationDateTime64::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { DateTime64 x = 0; if (checkChar('"', istr)) @@ -185,7 +185,7 @@ void SerializationDateTime64::deserializeTextNoEmptyCheckJSON(IColumn & column, assert_cast(column).getData().push_back(x); } -bool SerializationDateTime64::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +bool SerializationDateTime64::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { DateTime64 x = 0; if (checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationDateTime64.h b/src/DataTypes/Serializations/SerializationDateTime64.h index a3bc4d1ad4e..b49bd1e9098 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.h +++ b/src/DataTypes/Serializations/SerializationDateTime64.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include class DateLUTImpl; @@ -9,7 +8,7 @@ class DateLUTImpl; namespace DB { -class SerializationDateTime64 final : public SerializationAsStringNonTrivialJSON>, public TimezoneMixin +class SerializationDateTime64 final : public SerializationDecimalBase, public TimezoneMixin { public: SerializationDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_); @@ -26,8 +25,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp index ecd50a0b9b8..c1beceb4533 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.cpp @@ -69,7 +69,7 @@ void SerializationIP::serializeTextJSON(const DB::IColumn & column, size_t } template -void SerializationIP::deserializeTextNoEmptyCheckJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const +void SerializationIP::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const { IPv x; assertChar('"', istr); @@ -84,7 +84,7 @@ void SerializationIP::deserializeTextNoEmptyCheckJSON(DB::IColumn & column, } template -bool SerializationIP::tryDeserializeTextNoEmptyCheckJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const +bool SerializationIP::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const { IPv x; if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h index 44f36252741..a53f257646b 100644 --- a/src/DataTypes/Serializations/SerializationIPv4andIPv6.h +++ b/src/DataTypes/Serializations/SerializationIPv4andIPv6.h @@ -4,14 +4,13 @@ #include #include #include -#include #include namespace DB { template -class SerializationIP : public SerializationAsStringNonTrivialJSON +class SerializationIP : public SimpleTextSerialization { public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -23,8 +22,8 @@ public: bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override; diff --git a/src/DataTypes/Serializations/SerializationMap.cpp b/src/DataTypes/Serializations/SerializationMap.cpp index c722b3ac7a1..4a9f701c5c1 100644 --- a/src/DataTypes/Serializations/SerializationMap.cpp +++ b/src/DataTypes/Serializations/SerializationMap.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -315,29 +316,116 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro writeChar('}', ostr); } - -void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +ReturnType SerializationMap::deserializeTextJSONMapImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - deserializeTextImpl(column, istr, - [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) + static constexpr auto throw_exception = std::is_same_v; + + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + auto do_deserialize_subcolumn = [](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + subcolumn.insertDefault(); + return ReturnType(true); + } + return deserialize(buf, subcolumn_serialization, subcolumn); + }; + + auto deserialize_subcolumn_impl = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + if constexpr (throw_exception) { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); else subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings); - }); + } + else + { + if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); + else + return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); + } + }; + + auto deserialize_subcolumn = [&settings, &do_deserialize_subcolumn, &deserialize_subcolumn_impl](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType + { + if (!settings.json.empty_as_default || buf.eof() || *buf.position() != EMPTY_STRING[0]) + return do_deserialize_subcolumn(buf, subcolumn_serialization, subcolumn, [](ReadBuffer &){ return false; }, deserialize_subcolumn_impl); + + if (buf.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + else + { + buf_.position() = pos; + return false; + } + }; + + return do_deserialize_subcolumn(buf, subcolumn_serialization, subcolumn, check_for_empty_string, deserialize_subcolumn_impl); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + else + { + peekable_buf.rollbackToCheckpoint(); + return false; + } + }; + + auto deserialize_subcolumn_impl_with_check = [&deserialize_subcolumn_impl](ReadBuffer & buf_, const SerializationPtr & subcolumn_serialization_, IColumn & subcolumn_) -> ReturnType + { + auto & peekable_buf = assert_cast(buf_); + if constexpr (throw_exception) + { + deserialize_subcolumn_impl(peekable_buf, subcolumn_serialization_, subcolumn_); + assert(!peekable_buf.hasUnreadData()); + } + else + { + if (!deserialize_subcolumn_impl(peekable_buf, subcolumn_serialization_, subcolumn_)) + return false; + if (likely(!peekable_buf.hasUnreadData())) + return true; + return false; + } + }; + + PeekableReadBuffer peekable_buf(buf, true); + return do_deserialize_subcolumn(peekable_buf, subcolumn_serialization, subcolumn, check_for_empty_string, deserialize_subcolumn_impl_with_check); + }; + + return deserializeTextImpl(column, istr, deserialize_subcolumn); +} + +void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextJSONMapImpl(column, istr, settings); } bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - auto reader = [&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) - { - if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization); - return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings); - }; - - return deserializeTextImpl(column, istr, reader); + return deserializeTextJSONMapImpl(column, istr, settings); } void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationMap.h b/src/DataTypes/Serializations/SerializationMap.h index cfcde445c1f..9bdc110c445 100644 --- a/src/DataTypes/Serializations/SerializationMap.h +++ b/src/DataTypes/Serializations/SerializationMap.h @@ -74,6 +74,9 @@ private: template ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const; + + template + ReturnType deserializeTextJSONMapImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const; }; } diff --git a/src/DataTypes/Serializations/SerializationNullable.cpp b/src/DataTypes/Serializations/SerializationNullable.cpp index 3cd767e58eb..e72dd3a42f5 100644 --- a/src/DataTypes/Serializations/SerializationNullable.cpp +++ b/src/DataTypes/Serializations/SerializationNullable.cpp @@ -844,52 +844,25 @@ bool SerializationNullable::tryDeserializeNullJSON(DB::ReadBuffer & istr) return checkString("null", istr); } -namespace -{ - -enum class Strategy : uint8_t -{ - Deserialize, - DeserializeNoEmptyCheck, - TryDeserialize -}; - -template struct ReturnTypeImpl; -template <> struct ReturnTypeImpl { using Type = void; }; -template <> struct ReturnTypeImpl { using Type = bool; }; -template <> struct ReturnTypeImpl { using Type = void; }; - -template -using ReturnType = typename ReturnTypeImpl::Type; - -template struct AlwaysFalse : std::false_type {}; - -template -ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null) +template +ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null) { auto check_for_null = [](ReadBuffer & buf){ return checkStringByFirstCharacterAndAssertTheRest("null", buf); }; - auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf) -> ReturnType + auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf) { - if constexpr (strategy == Strategy::TryDeserialize) + if constexpr (std::is_same_v) return nested->tryDeserializeTextJSON(nested_column, buf, settings); - else if constexpr (strategy == Strategy::Deserialize) - nested->deserializeTextJSON(nested_column, buf, settings); - else if constexpr (strategy == Strategy::DeserializeNoEmptyCheck) - nested->deserializeTextNoEmptyCheckJSON(nested_column, buf, settings); - else - static_assert(AlwaysFalse::value); + nested->deserializeTextJSON(nested_column, buf, settings); }; - return deserializeImpl>(column, istr, check_for_null, deserialize_nested, is_null); -} - + return deserializeImpl(column, istr, check_for_null, deserialize_nested, is_null); } void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { ColumnNullable & col = assert_cast(column); bool is_null; - deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null); + deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null); safeAppendToNullMap(col, is_null); } @@ -897,27 +870,20 @@ bool SerializationNullable::tryDeserializeTextJSON(IColumn & column, ReadBuffer { ColumnNullable & col = assert_cast(column); bool is_null; - return deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap(col, is_null); + return deserializeTextJSONImpl(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap(col, is_null); } bool SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) { bool is_null; - deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); - return !is_null; -} - -bool SerializationNullable::deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) -{ - bool is_null; - deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); + deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); return !is_null; } bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) { bool is_null; - return deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); + return deserializeTextJSONImpl(nested_column, istr, settings, nested_serialization, is_null); } void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index c5215e2a39f..f7d2d2eadf0 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -88,7 +88,6 @@ public: static bool deserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); - static bool deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization); /// If Check for NULL and deserialize value into non-nullable column or insert default value of nested type. diff --git a/src/DataTypes/Serializations/SerializationTuple.cpp b/src/DataTypes/Serializations/SerializationTuple.cpp index 594a23ab507..7238c2bdbd7 100644 --- a/src/DataTypes/Serializations/SerializationTuple.cpp +++ b/src/DataTypes/Serializations/SerializationTuple.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -317,24 +318,100 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf { static constexpr bool throw_exception = std::is_same_v; - auto deserialize_element = [&](IColumn & element_column, size_t element_pos) + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + + auto do_deserialize_element = [](IColumn & element_column, size_t element_pos, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType + { + if (check_for_empty_string(buf)) + { + element_column.insertDefault(); + return ReturnType(true); + } + + return deserialize(element_column, element_pos, buf); + }; + + auto deserialize_element_impl = [&settings, this](IColumn & element_column, size_t element_pos, ReadBuffer & buf) -> ReturnType { if constexpr (throw_exception) { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]); + SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(element_column, buf, settings, elems[element_pos]); else - elems[element_pos]->deserializeTextJSON(element_column, istr, settings); - return true; + elems[element_pos]->deserializeTextJSON(element_column, buf, settings); } else { if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(element_column)) - return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, istr, settings, elems[element_pos]); - return elems[element_pos]->tryDeserializeTextJSON(element_column, istr, settings); + return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(element_column, buf, settings, elems[element_pos]); + return elems[element_pos]->tryDeserializeTextJSON(element_column, buf, settings); } }; + auto deserialize_element = [&settings, &do_deserialize_element, &deserialize_element_impl, &istr](IColumn & element_column, size_t element_pos) -> ReturnType + { + if (!settings.json.empty_as_default || istr.eof() || *istr.position() != EMPTY_STRING[0]) + return do_deserialize_element(element_column, element_pos, istr, [](ReadBuffer &) { return false; }, deserialize_element_impl); + + if (istr.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + else + { + buf_.position() = pos; + return false; + } + }; + + return do_deserialize_element(element_column, element_pos, istr, check_for_empty_string, deserialize_element_impl); + } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + else + { + peekable_buf.rollbackToCheckpoint(); + return false; + } + }; + + auto deserialize_element_impl_with_check = [&deserialize_element_impl](IColumn & element_column_, size_t element_pos_, ReadBuffer & buf_) -> ReturnType + { + auto & peekable_buf = assert_cast(buf_); + if constexpr (throw_exception) + { + deserialize_element_impl(element_column_, element_pos_, peekable_buf); + assert(!peekable_buf.hasUnreadData()); + } + else + { + if (!deserialize_element_impl(element_column_, element_pos_, peekable_buf)) + return false; + if (likely(!peekable_buf.hasUnreadData())) + return true; + return false; + } + }; + + PeekableReadBuffer peekable_buf(istr, true); + return do_deserialize_element(element_column, element_pos, peekable_buf, check_for_empty_string, deserialize_element_impl_with_check); + }; + if (settings.json.read_named_tuples_as_objects && have_explicit_names) { diff --git a/src/DataTypes/Serializations/SerializationUUID.cpp b/src/DataTypes/Serializations/SerializationUUID.cpp index 21a0ccf676c..f18466ad8ad 100644 --- a/src/DataTypes/Serializations/SerializationUUID.cpp +++ b/src/DataTypes/Serializations/SerializationUUID.cpp @@ -94,7 +94,7 @@ void SerializationUUID::serializeTextJSON(const IColumn & column, size_t row_num writeChar('"', ostr); } -void SerializationUUID::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; assertChar('"', istr); @@ -103,7 +103,7 @@ void SerializationUUID::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBu assert_cast(column).getData().push_back(x); } -bool SerializationUUID::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +bool SerializationUUID::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const { UUID x; if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr)) diff --git a/src/DataTypes/Serializations/SerializationUUID.h b/src/DataTypes/Serializations/SerializationUUID.h index 185d1d44c16..458504f8f42 100644 --- a/src/DataTypes/Serializations/SerializationUUID.h +++ b/src/DataTypes/Serializations/SerializationUUID.h @@ -1,12 +1,11 @@ #pragma once #include -#include namespace DB { -class SerializationUUID : public SerializationAsStringNonTrivialJSON +class SerializationUUID : public SimpleTextSerialization { public: void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -16,8 +15,8 @@ public: void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; - bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 73189e81f97..34f55ef00d8 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include #include @@ -268,6 +270,9 @@ namespace JSONUtils const FormatSettings & format_settings, bool yield_strings) { + static constexpr auto EMPTY_STRING = "\"\""; + static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length(); + try { bool as_nullable = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type); @@ -286,33 +291,78 @@ namespace JSONUtils return true; } - if (format_settings.json.empty_as_default && type->isNonTriviallySerializedAsStringJSON()) + auto do_deserialize = [](IColumn & column_, ReadBuffer & buf_, auto && check_for_empty_string, auto && deserialize) -> bool { - /// We have a non-numeric non-string data type at the top level. - /// At first glance, it looks like we sort of duplicate the work done in - /// SerializationAsStringNonTrivialJSON. Actually we need to proceed as - /// done here because we want to return false if we inserted a default - /// value on purpose, which the ISerialization interface does not allow for. - if (tryMatchEmptyString(in)) + if (check_for_empty_string(buf_)) { - column.insertDefault(); + column_.insertDefault(); return false; } + else + return deserialize(column_, buf_); + }; - if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(column, in, format_settings, serialization); - - serialization->deserializeTextNoEmptyCheckJSON(column, in, format_settings); - return true; - } - else + auto deserialize_impl = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf_) -> bool { if (as_nullable) - return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization); + return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf_, format_settings, serialization); - serialization->deserializeTextJSON(column, in, format_settings); + serialization->deserializeTextJSON(column_, buf_, format_settings); return true; + }; + + if (!format_settings.json.empty_as_default || in.eof() || *in.position() != EMPTY_STRING[0]) + return do_deserialize(column, in, [](ReadBuffer &) { return false; }, deserialize_impl); + + if (in.available() >= EMPTY_STRING_LENGTH) + { + /// We have enough data in buffer to check if we have an empty string. + auto check_for_empty_string = [](ReadBuffer & buf_) + { + auto * pos = buf_.position(); + if (checkString(EMPTY_STRING, buf_)) + return true; + else + { + buf_.position() = pos; + return false; + } + }; + + return do_deserialize(column, in, check_for_empty_string, deserialize_impl); } + + /// We don't have enough data in buffer to check if we have an empty string. + /// Use PeekableReadBuffer to make a checkpoint before checking for an + /// empty string and rollback if check was failed. + + auto check_for_empty_string = [](ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + peekable_buf.setCheckpoint(); + SCOPE_EXIT(peekable_buf.dropCheckpoint()); + if (checkString(EMPTY_STRING, peekable_buf)) + return true; + else + { + peekable_buf.rollbackToCheckpoint(); + return false; + } + }; + + auto deserialize_impl_with_check = [&deserialize_impl](IColumn & column_, ReadBuffer & buf_) -> bool + { + auto & peekable_buf = assert_cast(buf_); + + if (!deserialize_impl(column_, peekable_buf)) + return false; + if (likely(!peekable_buf.hasUnreadData())) + return true; + return false; + }; + + PeekableReadBuffer peekable_buf(in, true); + return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_impl_with_check); } catch (Exception & e) { diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index e8ef667a4e3..9559462e62b 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1137,23 +1137,6 @@ template void readCSVStringInto(String & s, ReadBuffer & b template void readCSVStringInto(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template void readCSVStringInto, false, false>(PaddedPODArray & s, ReadBuffer & buf, const FormatSettings::CSV & settings); -bool tryMatchEmptyString(ReadBuffer & buf) -{ - if (buf.eof() || *buf.position() != '"') - return false; - - ++buf.position(); - - if (buf.eof() || *buf.position() != '"') - { - --buf.position(); - return false; - } - - ++buf.position(); - - return true; -} template ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings) diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 59c1923e02a..ffba4fafb5c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -665,10 +665,6 @@ void readStringUntilEOFInto(Vector & s, ReadBuffer & buf); template void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings); -/// Consumes the current token if it is an empty string, i.e. two consecutive double quotes, -/// Returns true if consumed. -bool tryMatchEmptyString(ReadBuffer & buf); - /// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception. template ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings); diff --git a/tests/queries/0_stateless/03210_json_empty_as_default.reference b/tests/queries/0_stateless/03215_json_empty_as_default.reference similarity index 95% rename from tests/queries/0_stateless/03210_json_empty_as_default.reference rename to tests/queries/0_stateless/03215_json_empty_as_default.reference index 1c689228cdf..1a98f45577a 100644 --- a/tests/queries/0_stateless/03210_json_empty_as_default.reference +++ b/tests/queries/0_stateless/03215_json_empty_as_default.reference @@ -24,18 +24,18 @@ SELECT * FROM table1 ORDER BY address ASC; -- Nullable -- { echoOn } SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}'); -:: +\N -- Compound types SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}'); ['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e','00000000-0000-0000-0000-000000000000'] SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}'); -['::','::'] +[NULL,NULL] SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}'); ('1970-01-01','0.0.0.0','abc') SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}'); {'abc':'::'} SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}'); -00000000-0000-0000-0000-000000000000 +\N -- Deep composition SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}'); [['2001:db8:3333:4444:cccc:dddd:eeee:ffff','::'],['::','2001:db8:3333:4444:5555:6666:7777:8888']] @@ -44,4 +44,4 @@ SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b1 SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}'); (['00000000-0000-0000-0000-000000000000'],('00000000-0000-0000-0000-000000000000',{'abc':'::'})) SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}'); -{('1970-01-01','0.0.0.0'):'00000000-0000-0000-0000-000000000000'} +{('1970-01-01','0.0.0.0'):NULL} diff --git a/tests/queries/0_stateless/03210_json_empty_as_default.sql b/tests/queries/0_stateless/03215_json_empty_as_default.sql similarity index 100% rename from tests/queries/0_stateless/03210_json_empty_as_default.sql rename to tests/queries/0_stateless/03215_json_empty_as_default.sql