Support for the input_format_json_empty_as_default setting

This commit is contained in:
Alexis Arnaud 2024-07-17 03:19:07 +02:00
parent dd3fa7c3b6
commit d36176ad85
36 changed files with 300 additions and 48 deletions

View File

@ -1342,6 +1342,7 @@ SELECT * FROM json_each_row_nested
- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - ignore unknown keys in json object for named tuples. Default value - `false`. - [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - ignore unknown keys in json object for named tuples. Default value - `false`.
- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`. - [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`.
- [input_format_json_throw_on_bad_escape_sequence](/docs/en/operations/settings/settings-formats.md/#input_format_json_throw_on_bad_escape_sequence) - throw an exception if JSON string contains bad escape sequence. If disabled, bad escape sequences will remain as is in the data. Default value - `true`. - [input_format_json_throw_on_bad_escape_sequence](/docs/en/operations/settings/settings-formats.md/#input_format_json_throw_on_bad_escape_sequence) - throw an exception if JSON string contains bad escape sequence. If disabled, bad escape sequences will remain as is in the data. Default value - `true`.
- [input_format_json_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_json_empty_as_default) - treat empty fields in JSON input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too.
- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. - [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.

View File

@ -741,6 +741,17 @@ Possible values:
Default value: 0. Default value: 0.
### input_format_json_empty_as_default {#input_format_json_empty_as_default}
When enabled, replace empty input fields in JSON with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too.
Possible values:
+ 0 — Disable.
+ 1 — Enable.
Default value: 0.
## TSV format settings {#tsv-format-settings} ## TSV format settings {#tsv-format-settings}
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}

View File

@ -1133,6 +1133,7 @@ class IColumn;
M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \ M(Bool, input_format_json_throw_on_bad_escape_sequence, true, "Throw an exception if JSON string contains bad escape sequence in JSON input formats. If disabled, bad escape sequences will remain as is in the data", 0) \
M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \ M(Bool, input_format_json_ignore_unnecessary_fields, true, "Ignore unnecessary fields and not parse them. Enabling this may not throw exceptions on json strings of invalid format or with duplicated fields", 0) \
M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \ M(UInt64, input_format_json_max_depth, 1000, "Maximum depth of a field in JSON. This is not a strict limit, it does not have to be applied precisely.", 0) \
M(Bool, input_format_json_empty_as_default, false, "Treat empty fields in JSON input as default values.", 0) \
M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \
M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \

View File

@ -64,6 +64,8 @@ public:
SerializationPtr doGetDefaultSerialization() const override; SerializationPtr doGetDefaultSerialization() const override;
bool supportsSparseSerialization() const override { return false; } bool supportsSparseSerialization() const override { return false; }
bool isNonTriviallySerializedAsStringJSON() const override { return true; }
bool isVersioned() const; bool isVersioned() const;
/// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according /// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according

View File

@ -17,6 +17,7 @@ public:
bool canBeUsedAsVersion() const override { return true; } bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }
bool isNonTriviallySerializedAsStringJSON() const override { return true; }
bool equals(const IDataType & rhs) const override; bool equals(const IDataType & rhs) const override;

View File

@ -18,6 +18,7 @@ public:
bool canBeUsedAsVersion() const override { return true; } bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }
bool isNonTriviallySerializedAsStringJSON() const override { return true; }
bool equals(const IDataType & rhs) const override; bool equals(const IDataType & rhs) const override;

View File

@ -44,6 +44,7 @@ public:
bool canBeUsedAsVersion() const override { return true; } bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; } bool canBeInsideNullable() const override { return true; }
bool isNonTriviallySerializedAsStringJSON() const override { return true; }
bool equals(const IDataType & rhs) const override; bool equals(const IDataType & rhs) const override;

View File

@ -39,6 +39,8 @@ public:
bool isSummable() const override { return false; } bool isSummable() const override { return false; }
bool isNonTriviallySerializedAsStringJSON() const override { return true; }
protected: protected:
SerializationPtr doGetDefaultSerialization() const override; SerializationPtr doGetDefaultSerialization() const override;
}; };

View File

@ -46,6 +46,7 @@ public:
size_t getSizeOfValueInMemory() const override { return sizeof(IPv4); } size_t getSizeOfValueInMemory() const override { return sizeof(IPv4); }
bool isCategorial() const override { return true; } bool isCategorial() const override { return true; }
bool canBeInsideLowCardinality() const override { return true; } bool canBeInsideLowCardinality() const override { return true; }
bool isNonTriviallySerializedAsStringJSON() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv4>>(); } SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv4>>(); }
}; };
@ -84,6 +85,7 @@ public:
size_t getSizeOfValueInMemory() const override { return sizeof(IPv6); } size_t getSizeOfValueInMemory() const override { return sizeof(IPv6); }
bool isCategorial() const override { return true; } bool isCategorial() const override { return true; }
bool canBeInsideLowCardinality() const override { return true; } bool canBeInsideLowCardinality() const override { return true; }
bool isNonTriviallySerializedAsStringJSON() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv6>>(); } SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv6>>(); }
}; };

View File

@ -42,6 +42,7 @@ public:
size_t getSizeOfValueInMemory() const override { return sizeof(UUID); } size_t getSizeOfValueInMemory() const override { return sizeof(UUID); }
bool isCategorial() const override { return true; } bool isCategorial() const override { return true; }
bool canBeInsideLowCardinality() const override { return true; } bool canBeInsideLowCardinality() const override { return true; }
bool isNonTriviallySerializedAsStringJSON() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override; SerializationPtr doGetDefaultSerialization() const override;
}; };

View File

@ -328,6 +328,9 @@ public:
/// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column. /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint); static void updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint);
/// non-numeric non-string data type serialized as JSON string
virtual bool isNonTriviallySerializedAsStringJSON() const { return false; }
protected: protected:
friend class DataTypeFactory; friend class DataTypeFactory;
friend class AggregateFunctionSimpleState; friend class AggregateFunctionSimpleState;

View File

@ -398,12 +398,20 @@ public:
virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0; virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0; virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
virtual bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const; virtual bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const;
/// The following two methods are implemented only for non-numeric non-string simple data types.
virtual void deserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method deserializeTextNoEmptyCheckJSON is not supported");
}
virtual bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported");
}
virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t /*indent*/) const virtual void serializeTextJSONPretty(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings, size_t /*indent*/) const
{ {
serializeTextJSON(column, row_num, ostr, settings); serializeTextJSON(column, row_num, ostr, settings);
} }
/** Text serialization for putting into the XML format. /** Text serialization for putting into the XML format.
*/ */
virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -182,7 +182,7 @@ void SerializationAggregateFunction::serializeTextJSON(const IColumn & column, s
} }
void SerializationAggregateFunction::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const void SerializationAggregateFunction::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
String s; String s;
readJSONString(s, istr, settings.json); readJSONString(s, istr, settings.json);

View File

@ -3,12 +3,13 @@
#include <AggregateFunctions/IAggregateFunction_fwd.h> #include <AggregateFunctions/IAggregateFunction_fwd.h>
#include <DataTypes/Serializations/ISerialization.h> #include <DataTypes/Serializations/ISerialization.h>
#include <DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h>
namespace DB namespace DB
{ {
class SerializationAggregateFunction final : public ISerialization class SerializationAggregateFunction final : public SerializationAsStringNonTrivialJSON<ISerialization>
{ {
private: private:
AggregateFunctionPtr function; AggregateFunctionPtr function;
@ -37,7 +38,7 @@ public:
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;

View File

@ -0,0 +1,56 @@
#pragma once
#include <Core/Types_fwd.h>
#include <Columns/IColumn.h>
#include <DataTypes/Serializations/ISerialization.h>
#include <Formats/FormatSettings.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <concepts>
namespace DB
{
/** Serialization for non-numeric non-string data types serialized as JSON strings
* For these data types, we support an option, input_format_json_empty_as_default, which, when set to 1,
* allows for JSON deserialization to treat an encountered empty string as a default value for the specified type.
* Derived classes must implement the following methods:
* deserializeTextNoEmptyCheckJSON() and tryDeserializeTextNoEmptyCheckJSON()
* instead of deserializeTextJSON() and tryDeserializeTextJSON() respectively.
*/
template <typename T>
requires std::derived_from<T, ISerialization>
class SerializationAsStringNonTrivialJSON : public T
{
public:
using T::T;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & format_settings) const override
{
if (format_settings.json.empty_as_default && tryMatchEmptyString(istr))
column.insertDefault();
else
deserializeTextNoEmptyCheckJSON(column, istr, format_settings);
}
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & format_settings) const override
{
if (format_settings.json.empty_as_default && tryMatchEmptyString(istr))
{
column.insertDefault();
return true;
}
else
return tryDeserializeTextNoEmptyCheckJSON(column, istr, format_settings);
}
virtual void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override = 0;
virtual bool tryDeserializeTextNoEmptyCheckJSON(IColumn & /*column*/, ReadBuffer & /*istr*/, const FormatSettings &) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryDeserializeTextNoEmptyCheckJSON is not supported");
}
};
}

View File

@ -85,7 +85,7 @@ void SerializationDate::serializeTextJSON(const IColumn & column, size_t row_num
writeChar('"', ostr); writeChar('"', ostr);
} }
void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const void SerializationDate::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{ {
DayNum x; DayNum x;
assertChar('"', istr); assertChar('"', istr);
@ -94,7 +94,7 @@ void SerializationDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
assert_cast<ColumnUInt16 &>(column).getData().push_back(x); assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
} }
bool SerializationDate::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const bool SerializationDate::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{ {
DayNum x; DayNum x;
if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr)) if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr))

View File

@ -1,12 +1,13 @@
#pragma once #pragma once
#include <DataTypes/Serializations/SerializationNumber.h> #include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h>
#include <Common/DateLUT.h> #include <Common/DateLUT.h>
namespace DB namespace DB
{ {
class SerializationDate final : public SerializationNumber<UInt16> class SerializationDate final : public SerializationAsStringNonTrivialJSON<SerializationNumber<UInt16>>
{ {
public: public:
explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::instance()); explicit SerializationDate(const DateLUTImpl & time_zone_ = DateLUT::instance());
@ -21,8 +22,8 @@ public:
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;

View File

@ -83,7 +83,7 @@ void SerializationDate32::serializeTextJSON(const IColumn & column, size_t row_n
writeChar('"', ostr); writeChar('"', ostr);
} }
void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const void SerializationDate32::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{ {
ExtendedDayNum x; ExtendedDayNum x;
assertChar('"', istr); assertChar('"', istr);
@ -92,7 +92,7 @@ void SerializationDate32::deserializeTextJSON(IColumn & column, ReadBuffer & ist
assert_cast<ColumnInt32 &>(column).getData().push_back(x); assert_cast<ColumnInt32 &>(column).getData().push_back(x);
} }
bool SerializationDate32::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const bool SerializationDate32::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{ {
ExtendedDayNum x; ExtendedDayNum x;
if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr)) if (!checkChar('"', istr) || !tryReadDateText(x, istr, time_zone) || !checkChar('"', istr))

View File

@ -1,11 +1,12 @@
#pragma once #pragma once
#include <DataTypes/Serializations/SerializationNumber.h> #include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h>
#include <Common/DateLUT.h> #include <Common/DateLUT.h>
namespace DB namespace DB
{ {
class SerializationDate32 final : public SerializationNumber<Int32> class SerializationDate32 final : public SerializationAsStringNonTrivialJSON<SerializationNumber<Int32>>
{ {
public: public:
explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::instance()); explicit SerializationDate32(const DateLUTImpl & time_zone_ = DateLUT::instance());
@ -20,8 +21,8 @@ public:
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;

View File

@ -180,7 +180,7 @@ void SerializationDateTime::serializeTextJSON(const IColumn & column, size_t row
writeChar('"', ostr); writeChar('"', ostr);
} }
void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const void SerializationDateTime::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
time_t x = 0; time_t x = 0;
if (checkChar('"', istr)) if (checkChar('"', istr))
@ -196,7 +196,7 @@ void SerializationDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & i
assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x)); assert_cast<ColumnType &>(column).getData().push_back(static_cast<UInt32>(x));
} }
bool SerializationDateTime::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const bool SerializationDateTime::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
time_t x = 0; time_t x = 0;
if (checkChar('"', istr)) if (checkChar('"', istr))

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <DataTypes/Serializations/SerializationNumber.h> #include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h>
#include <DataTypes/TimezoneMixin.h> #include <DataTypes/TimezoneMixin.h>
class DateLUTImpl; class DateLUTImpl;
@ -8,7 +9,7 @@ class DateLUTImpl;
namespace DB namespace DB
{ {
class SerializationDateTime final : public SerializationNumber<UInt32>, public TimezoneMixin class SerializationDateTime final : public SerializationAsStringNonTrivialJSON<SerializationNumber<UInt32>>, public TimezoneMixin
{ {
public: public:
explicit SerializationDateTime(const TimezoneMixin & time_zone_); explicit SerializationDateTime(const TimezoneMixin & time_zone_);
@ -23,8 +24,8 @@ public:
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;

View File

@ -15,7 +15,7 @@ namespace DB
SerializationDateTime64::SerializationDateTime64( SerializationDateTime64::SerializationDateTime64(
UInt32 scale_, const TimezoneMixin & time_zone_) UInt32 scale_, const TimezoneMixin & time_zone_)
: SerializationDecimalBase<DateTime64>(DecimalUtils::max_precision<DateTime64>, scale_) : SerializationAsStringNonTrivialJSON<SerializationDecimalBase<DateTime64>>(DecimalUtils::max_precision<DateTime64>, scale_)
, TimezoneMixin(time_zone_) , TimezoneMixin(time_zone_)
{ {
} }
@ -170,7 +170,7 @@ void SerializationDateTime64::serializeTextJSON(const IColumn & column, size_t r
writeChar('"', ostr); writeChar('"', ostr);
} }
void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const void SerializationDateTime64::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
DateTime64 x = 0; DateTime64 x = 0;
if (checkChar('"', istr)) if (checkChar('"', istr))
@ -185,7 +185,7 @@ void SerializationDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer &
assert_cast<ColumnType &>(column).getData().push_back(x); assert_cast<ColumnType &>(column).getData().push_back(x);
} }
bool SerializationDateTime64::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const bool SerializationDateTime64::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
DateTime64 x = 0; DateTime64 x = 0;
if (checkChar('"', istr)) if (checkChar('"', istr))

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include <DataTypes/Serializations/SerializationDecimalBase.h> #include <DataTypes/Serializations/SerializationDecimalBase.h>
#include <DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h>
#include <DataTypes/TimezoneMixin.h> #include <DataTypes/TimezoneMixin.h>
class DateLUTImpl; class DateLUTImpl;
@ -8,7 +9,7 @@ class DateLUTImpl;
namespace DB namespace DB
{ {
class SerializationDateTime64 final : public SerializationDecimalBase<DateTime64>, public TimezoneMixin class SerializationDateTime64 final : public SerializationAsStringNonTrivialJSON<SerializationDecimalBase<DateTime64>>, public TimezoneMixin
{ {
public: public:
SerializationDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_); SerializationDateTime64(UInt32 scale_, const TimezoneMixin & time_zone_);
@ -25,8 +26,8 @@ public:
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;

View File

@ -69,7 +69,7 @@ void SerializationIP<IPv>::serializeTextJSON(const DB::IColumn & column, size_t
} }
template <typename IPv> template <typename IPv>
void SerializationIP<IPv>::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const void SerializationIP<IPv>::deserializeTextNoEmptyCheckJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{ {
IPv x; IPv x;
assertChar('"', istr); assertChar('"', istr);
@ -84,7 +84,7 @@ void SerializationIP<IPv>::deserializeTextJSON(DB::IColumn & column, DB::ReadBuf
} }
template <typename IPv> template <typename IPv>
bool SerializationIP<IPv>::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const bool SerializationIP<IPv>::tryDeserializeTextNoEmptyCheckJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings &) const
{ {
IPv x; IPv x;
if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr)) if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr))

View File

@ -4,13 +4,14 @@
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Columns/ColumnsNumber.h> #include <Columns/ColumnsNumber.h>
#include <DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h>
#include <DataTypes/Serializations/SimpleTextSerialization.h> #include <DataTypes/Serializations/SimpleTextSerialization.h>
namespace DB namespace DB
{ {
template <typename IPv> template <typename IPv>
class SerializationIP : public SimpleTextSerialization class SerializationIP : public SerializationAsStringNonTrivialJSON<SimpleTextSerialization>
{ {
public: public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@ -22,8 +23,8 @@ public:
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &/* settings*/) const override;

View File

@ -844,25 +844,52 @@ bool SerializationNullable::tryDeserializeNullJSON(DB::ReadBuffer & istr)
return checkString("null", istr); return checkString("null", istr);
} }
template<typename ReturnType> namespace
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null) {
enum class Strategy : uint8_t
{
Deserialize,
DeserializeNoEmptyCheck,
TryDeserialize
};
template <Strategy> struct ReturnTypeImpl;
template <> struct ReturnTypeImpl<Strategy::Deserialize> { using Type = void; };
template <> struct ReturnTypeImpl<Strategy::TryDeserialize> { using Type = bool; };
template <> struct ReturnTypeImpl<Strategy::DeserializeNoEmptyCheck> { using Type = void; };
template <Strategy strategy>
using ReturnType = typename ReturnTypeImpl<strategy>::Type;
template <Strategy> struct AlwaysFalse : std::false_type {};
template <Strategy strategy>
ReturnType<strategy> deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested, bool & is_null)
{ {
auto check_for_null = [](ReadBuffer & buf){ return checkStringByFirstCharacterAndAssertTheRest("null", buf); }; auto check_for_null = [](ReadBuffer & buf){ return checkStringByFirstCharacterAndAssertTheRest("null", buf); };
auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf) auto deserialize_nested = [&nested, &settings](IColumn & nested_column, ReadBuffer & buf)
{ {
if constexpr (std::is_same_v<ReturnType, bool>) if constexpr (strategy == Strategy::TryDeserialize)
return nested->tryDeserializeTextJSON(nested_column, buf, settings); return nested->tryDeserializeTextJSON(nested_column, buf, settings);
nested->deserializeTextJSON(nested_column, buf, settings); else if constexpr (strategy == Strategy::Deserialize)
nested->deserializeTextJSON(nested_column, buf, settings);
else if constexpr (strategy == Strategy::DeserializeNoEmptyCheck)
nested->deserializeTextNoEmptyCheckJSON(nested_column, buf, settings);
else
static_assert(AlwaysFalse<strategy>::value);
}; };
return deserializeImpl<ReturnType>(column, istr, check_for_null, deserialize_nested, is_null); return deserializeImpl<ReturnType<strategy>>(column, istr, check_for_null, deserialize_nested, is_null);
}
} }
void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const void SerializationNullable::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{ {
ColumnNullable & col = assert_cast<ColumnNullable &>(column); ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null; bool is_null;
deserializeTextJSONImpl<void>(col.getNestedColumn(), istr, settings, nested, is_null); deserializeTextJSONImpl<Strategy::Deserialize>(col.getNestedColumn(), istr, settings, nested, is_null);
safeAppendToNullMap<void>(col, is_null); safeAppendToNullMap<void>(col, is_null);
} }
@ -870,20 +897,27 @@ bool SerializationNullable::tryDeserializeTextJSON(IColumn & column, ReadBuffer
{ {
ColumnNullable & col = assert_cast<ColumnNullable &>(column); ColumnNullable & col = assert_cast<ColumnNullable &>(column);
bool is_null; bool is_null;
return deserializeTextJSONImpl<bool>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null); return deserializeTextJSONImpl<Strategy::TryDeserialize>(col.getNestedColumn(), istr, settings, nested, is_null) && safeAppendToNullMap<bool>(col, is_null);
} }
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) bool SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{ {
bool is_null; bool is_null;
deserializeTextJSONImpl<void>(nested_column, istr, settings, nested_serialization, is_null); deserializeTextJSONImpl<Strategy::Deserialize>(nested_column, istr, settings, nested_serialization, is_null);
return !is_null;
}
bool SerializationNullable::deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{
bool is_null;
deserializeTextJSONImpl<Strategy::DeserializeNoEmptyCheck>(nested_column, istr, settings, nested_serialization, is_null);
return !is_null; return !is_null;
} }
bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization) bool SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(DB::IColumn & nested_column, DB::ReadBuffer & istr, const DB::FormatSettings & settings, const DB::SerializationPtr & nested_serialization)
{ {
bool is_null; bool is_null;
return deserializeTextJSONImpl<bool>(nested_column, istr, settings, nested_serialization, is_null); return deserializeTextJSONImpl<Strategy::TryDeserialize>(nested_column, istr, settings, nested_serialization, is_null);
} }
void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const void SerializationNullable::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -88,6 +88,7 @@ public:
static bool deserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextQuoted(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextCSV(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(IColumn & nested_column, ReadBuffer & istr, const FormatSettings &, const SerializationPtr & nested_serialization);
static bool deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization); static bool deserializeNullAsDefaultOrNestedTextRaw(IColumn & nested_column, ReadBuffer & istr, const FormatSettings & settings, const SerializationPtr & nested_serialization);
/// If Check for NULL and deserialize value into non-nullable column or insert default value of nested type. /// If Check for NULL and deserialize value into non-nullable column or insert default value of nested type.

View File

@ -94,7 +94,7 @@ void SerializationUUID::serializeTextJSON(const IColumn & column, size_t row_num
writeChar('"', ostr); writeChar('"', ostr);
} }
void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const void SerializationUUID::deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{ {
UUID x; UUID x;
assertChar('"', istr); assertChar('"', istr);
@ -103,7 +103,7 @@ void SerializationUUID::deserializeTextJSON(IColumn & column, ReadBuffer & istr,
assert_cast<ColumnUUID &>(column).getData().push_back(x); assert_cast<ColumnUUID &>(column).getData().push_back(x);
} }
bool SerializationUUID::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const bool SerializationUUID::tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{ {
UUID x; UUID x;
if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr)) if (!checkChar('"', istr) || !tryReadText(x, istr) || !checkChar('"', istr))

View File

@ -1,11 +1,12 @@
#pragma once #pragma once
#include <DataTypes/Serializations/SerializationNumber.h> #include <DataTypes/Serializations/SerializationNumber.h>
#include <DataTypes/Serializations/SerializationAsStringNonTrivialJSON.h>
namespace DB namespace DB
{ {
class SerializationUUID : public SimpleTextSerialization class SerializationUUID : public SerializationAsStringNonTrivialJSON<SimpleTextSerialization>
{ {
public: public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
@ -15,8 +16,8 @@ public:
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; void deserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
bool tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; bool tryDeserializeTextNoEmptyCheckJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; bool tryDeserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;

View File

@ -151,6 +151,8 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects; format_settings.json.try_infer_objects_as_tuples = settings.input_format_json_try_infer_named_tuples_from_objects;
format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence; format_settings.json.throw_on_bad_escape_sequence = settings.input_format_json_throw_on_bad_escape_sequence;
format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields; format_settings.json.ignore_unnecessary_fields = settings.input_format_json_ignore_unnecessary_fields;
format_settings.json.case_insensitive_column_matching = settings.input_format_json_case_insensitive_column_matching;
format_settings.json.empty_as_default = settings.input_format_json_empty_as_default;
format_settings.null_as_default = settings.input_format_null_as_default; format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields; format_settings.force_null_for_omitted_fields = settings.input_format_force_null_for_omitted_fields;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;

View File

@ -234,6 +234,8 @@ struct FormatSettings
bool infer_incomplete_types_as_strings = true; bool infer_incomplete_types_as_strings = true;
bool throw_on_bad_escape_sequence = true; bool throw_on_bad_escape_sequence = true;
bool ignore_unnecessary_fields = true; bool ignore_unnecessary_fields = true;
bool case_insensitive_column_matching = false;
bool empty_as_default = false;
} json{}; } json{};
struct struct

View File

@ -286,11 +286,33 @@ namespace JSONUtils
return true; return true;
} }
if (as_nullable) if (format_settings.json.empty_as_default && type->isNonTriviallySerializedAsStringJSON())
return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization); {
/// We have a non-numeric non-string data type at the top level.
/// At first glance, it looks like we sort of duplicate the work done in
/// SerializationAsStringNonTrivialJSON. Actually we need to proceed as
/// done here because we want to return false if we inserted a default
/// value on purpose, which the ISerialization interface does not allow for.
if (tryMatchEmptyString(in))
{
column.insertDefault();
return false;
}
serialization->deserializeTextJSON(column, in, format_settings); if (as_nullable)
return true; return SerializationNullable::deserializeNullAsDefaultOrNestedTextNoEmptyCheckJSON(column, in, format_settings, serialization);
serialization->deserializeTextNoEmptyCheckJSON(column, in, format_settings);
return true;
}
else
{
if (as_nullable)
return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column, in, format_settings, serialization);
serialization->deserializeTextJSON(column, in, format_settings);
return true;
}
} }
catch (Exception & e) catch (Exception & e)
{ {

View File

@ -1137,6 +1137,23 @@ template void readCSVStringInto<String, false, false>(String & s, ReadBuffer & b
template void readCSVStringInto<String, true, false>(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template void readCSVStringInto<String, true, false>(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
template void readCSVStringInto<PaddedPODArray<UInt8>, false, false>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings); template void readCSVStringInto<PaddedPODArray<UInt8>, false, false>(PaddedPODArray<UInt8> & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
bool tryMatchEmptyString(ReadBuffer & buf)
{
if (buf.eof() || *buf.position() != '"')
return false;
++buf.position();
if (buf.eof() || *buf.position() != '"')
{
--buf.position();
return false;
}
++buf.position();
return true;
}
template <typename Vector, typename ReturnType> template <typename Vector, typename ReturnType>
ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings) ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings)

View File

@ -665,6 +665,10 @@ void readStringUntilEOFInto(Vector & s, ReadBuffer & buf);
template <typename Vector, bool include_quotes = false, bool allow_throw = true> template <typename Vector, bool include_quotes = false, bool allow_throw = true>
void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings); void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings);
/// Consumes the current token if it is an empty string, i.e. two consecutive double quotes,
/// Returns true if consumed.
bool tryMatchEmptyString(ReadBuffer & buf);
/// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception. /// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception.
template <typename Vector, typename ReturnType = void> template <typename Vector, typename ReturnType = void>
ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings); ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::JSON & settings);

View File

@ -0,0 +1,20 @@
1970-01-01
1970-01-01
1970-01-01 00:00:00
1970-01-01 00:00:00.000
0.0.0.0
::
00000000-0000-0000-0000-000000000000
1
::
2001:db8:3333:4444:5555:6666:7777:8888
::
['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e','00000000-0000-0000-0000-000000000000']
['::','::']
('1970-01-01','0.0.0.0','abc')
{'abc':'::'}
00000000-0000-0000-0000-000000000000
[['2001:db8:3333:4444:cccc:dddd:eeee:ffff','::'],['::','2001:db8:3333:4444:5555:6666:7777:8888']]
['00000000-0000-0000-0000-000000000000','b15f852c-c41a-4fd6-9247-1929c841715e']
(['00000000-0000-0000-0000-000000000000'],('00000000-0000-0000-0000-000000000000',{'abc':'::'}))
{('1970-01-01','0.0.0.0'):'00000000-0000-0000-0000-000000000000'}

View File

@ -0,0 +1,53 @@
set input_format_json_empty_as_default = 1;
set allow_experimental_variant_type = 1;
# Simple types
SELECT x FROM format(JSONEachRow, 'x Date', '{"x":""}');
SELECT x FROM format(JSONEachRow, 'x Date32', '{"x":""}');
SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime', '{"x":""}');
SELECT toTimeZone(x, 'UTC') FROM format(JSONEachRow, 'x DateTime64', '{"x":""}');
SELECT x FROM format(JSONEachRow, 'x IPv4', '{"x":""}');
SELECT x FROM format(JSONEachRow, 'x IPv6', '{"x":""}');
SELECT x FROM format(JSONEachRow, 'x UUID', '{"x":""}');
# Simple type AggregateFunction
DROP TABLE IF EXISTS table1;
CREATE TABLE table1(col AggregateFunction(uniq, UInt64)) ENGINE=Memory();
DROP TABLE IF EXISTS table2;
CREATE TABLE table2(UserID UInt64) ENGINE=Memory();
INSERT INTO table1 SELECT uniqState(UserID) FROM table2;
INSERT INTO table1 SELECT x FROM format(JSONEachRow, 'x AggregateFunction(uniq, UInt64)' AS T, '{"x":""}');
SELECT COUNT(DISTINCT col) FROM table1;
DROP TABLE table1;
DROP TABLE table2;
# The setting input_format_defaults_for_omitted_fields determines the default value if enabled.
CREATE TABLE table1(address IPv6 DEFAULT toIPv6('2001:db8:3333:4444:5555:6666:7777:8888')) ENGINE=Memory();
set input_format_defaults_for_omitted_fields = 0;
INSERT INTO table1 FORMAT JSONEachRow {"address":""};
set input_format_defaults_for_omitted_fields = 1;
INSERT INTO table1 FORMAT JSONEachRow {"address":""};
SELECT * FROM table1 ORDER BY address ASC;
DROP TABLE table1;
# Nullable
SELECT x FROM format(JSONEachRow, 'x Nullable(IPv6)', '{"x":""}');
# Compound types
SELECT x FROM format(JSONEachRow, 'x Array(UUID)', '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}');
SELECT x FROM format(JSONEachRow, 'x Array(Nullable(IPv6))', '{"x":["",""]}');
SELECT x FROM format(JSONEachRow, 'x Tuple(Date, IPv4, String)', '{"x":["", "", "abc"]}');
SELECT x FROM format(JSONEachRow, 'x Map(String, IPv6)', '{"x":{"abc": ""}}');
SELECT x FROM format(JSONEachRow, 'x Variant(Date, UUID)', '{"x":""}');
# Deep composition
SELECT x FROM format(JSONEachRow, 'x Array(Array(IPv6))', '{"x":[["2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF", ""], ["", "2001:db8:3333:4444:5555:6666:7777:8888"]]}');
SELECT x FROM format(JSONEachRow, 'x Variant(Date, Array(UUID))', '{"x":["", "b15f852c-c41a-4fd6-9247-1929c841715e"]}');
SELECT x FROM format(JSONEachRow, 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))', '{"x":[[""], ["",{"abc":""}]]}');
SELECT x FROM format(JSONEachRow, 'x Map(Tuple(Date,IPv4), Variant(UUID,IPv6))', '{"x":{["",""]:""}}');