post-review changes

This commit is contained in:
Alexis Arnaud 2024-08-13 14:50:37 +02:00
parent a39a4b1080
commit 29bc7cf5d5
13 changed files with 241 additions and 268 deletions

View File

@ -88,7 +88,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
{"optimize_functions_to_subcolumns", false, true, "Enabled settings by default"},
{"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
{"input_format_json_empty_as_default", true, false, "Added new setting to allow to treat empty fields in JSON input as default values."}
{"input_format_json_empty_as_default", false, false, "Added new setting to allow to treat empty fields in JSON input as default values."}
}
},

View File

@ -615,51 +615,50 @@ void SerializationArray::serializeTextJSONPretty(const IColumn & column, size_t
writeChar(']', ostr);
}
namespace
{
template <typename ReturnType>
ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const SerializationPtr & nested, const FormatSettings & settings)
ReturnType SerializationArray::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
auto deserializer = [&nested](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType
auto deserialize_nested = [&settings, this](IColumn & nested_column, ReadBuffer & buf) -> ReturnType
{
auto adapter = [&deserialize_nested, &istr_, &nested](IColumn & nested_column) -> ReturnType
if constexpr (std::is_same_v<ReturnType, void>)
{
return deserialize_nested(nested_column, istr_, nested);
};
return deserializeTextImpl<ReturnType>(column_, istr_, std::move(adapter), false);
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested);
else
nested->deserializeTextJSON(nested_column, buf, settings);
}
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested);
return nested->tryDeserializeTextJSON(nested_column, buf, settings);
}
};
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(column, istr, settings, std::move(deserializer));
}
if (settings.json.empty_as_default)
return deserializeTextImpl<ReturnType>(column, istr,
[&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType
{
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(nested_column, istr, deserialize_nested);
}, false);
else
return deserializeTextImpl<ReturnType>(column, istr,
[&deserialize_nested, &istr](IColumn & nested_column) -> ReturnType
{
return deserialize_nested(nested_column, istr);
}, false);
}
void SerializationArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.json.empty_as_default)
deserializeEmpyStringAsDefaultOrNested<void>(column, istr, nested, settings);
else
deserializeTextImpl(column, istr,
[&settings, &istr, this](IColumn & nested_column)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested);
else
nested->deserializeTextJSON(nested_column, istr, settings);
}, false);
deserializeTextJSONImpl<void>(column, istr, settings);
}
bool SerializationArray::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.json.empty_as_default)
return deserializeEmpyStringAsDefaultOrNested<bool>(column, istr, nested, settings);
return deserializeTextImpl<bool>(column, istr,
[&settings, &istr, this](IColumn & nested_column)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, nested);
return nested->tryDeserializeTextJSON(nested_column, istr, settings);
}, false);
return deserializeTextJSONImpl<bool>(column, istr, settings);
}

View File

@ -82,6 +82,10 @@ public:
SerializationPtr create(const SerializationPtr & prev) const override;
ColumnPtr create(const ColumnPtr & prev) const override;
};
private:
template <typename ReturnType>
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
};
}

View File

@ -316,48 +316,53 @@ void SerializationMap::serializeTextJSONPretty(const IColumn & column, size_t ro
writeChar('}', ostr);
}
template <typename ReturnType>
ReturnType SerializationMap::deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
ReturnType SerializationMap::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
auto deserializer = [this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType
auto deserialize_nested = [&settings](IColumn & subcolumn, ReadBuffer & buf, const SerializationPtr & subcolumn_serialization) -> ReturnType
{
auto adapter = [&deserialize_nested](ReadBuffer & buf, const SerializationPtr & nested_column_serialization, IColumn & nested_column) -> ReturnType
if constexpr (std::is_same_v<ReturnType, void>)
{
return deserialize_nested(nested_column, buf, nested_column_serialization);
};
return this->deserializeTextImpl<ReturnType>(column_, istr_, std::move(adapter));
};
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(column, istr, settings, std::move(deserializer));
}
void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.json.empty_as_default)
deserializeEmpyStringAsDefaultOrNested<void>(column, istr, settings);
else
deserializeTextImpl(column, istr,
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
else
subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings);
});
}
bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.json.empty_as_default)
return deserializeEmpyStringAsDefaultOrNested<bool>(column, istr, settings);
return deserializeTextImpl<bool>(column, istr,
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
else
subcolumn_serialization->deserializeTextJSON(subcolumn, buf, settings);
}
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(subcolumn))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(subcolumn, buf, settings, subcolumn_serialization);
return subcolumn_serialization->tryDeserializeTextJSON(subcolumn, buf, settings);
});
}
};
if (settings.json.empty_as_default)
return deserializeTextImpl<ReturnType>(column, istr,
[&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType
{
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(subcolumn, buf,
[&deserialize_nested, &subcolumn_serialization](IColumn & subcolumn_, ReadBuffer & buf_) -> ReturnType
{
return deserialize_nested(subcolumn_, buf_, subcolumn_serialization);
});
});
else
return deserializeTextImpl<ReturnType>(column, istr,
[&deserialize_nested](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn) -> ReturnType
{
return deserialize_nested(subcolumn, buf, subcolumn_serialization);
});
}
void SerializationMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextJSONImpl<void>(column, istr, settings);
}
bool SerializationMap::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return deserializeTextJSONImpl<bool>(column, istr, settings);
}
void SerializationMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const

View File

@ -76,7 +76,7 @@ private:
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && reader) const;
template <typename ReturnType>
ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
};
}

View File

@ -314,7 +314,7 @@ void SerializationTuple::serializeTextJSONPretty(const IColumn & column, size_t
}
template <typename ReturnType>
ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const
ReturnType SerializationTuple::deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const
{
static constexpr auto throw_exception = std::is_same_v<ReturnType, void>;
@ -490,48 +490,52 @@ ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuf
}
template <typename ReturnType>
ReturnType SerializationTuple::deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
ReturnType SerializationTuple::deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
auto deserializer = [&settings, this](IColumn & column_, ReadBuffer & istr_, auto && deserialize_nested) -> ReturnType
auto deserialize_nested = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType
{
auto adapter = [&deserialize_nested, &istr_, this](IColumn & nested_column, size_t element_pos) -> ReturnType
if constexpr (std::is_same_v<ReturnType, void>)
{
return deserialize_nested(nested_column, istr_, elems[element_pos]);
};
return deserializeTextJSONImpl<ReturnType>(column_, istr_, settings, std::move(adapter));
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization);
else
nested_column_serialization->deserializeTextJSON(nested_column, buf, settings);
}
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization);
else
return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings);
}
};
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(column, istr, settings, std::move(deserializer));
}
void SerializationTuple::deserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
{
if (settings.json.empty_as_default)
deserializeEmpyStringAsDefaultOrNested(column, istr, settings);
return deserializeTupleJSONImpl<ReturnType>(column, istr, settings,
[&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType
{
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<ReturnType>(nested_column, istr,
[&deserialize_nested, element_pos, this](IColumn & nested_column_, ReadBuffer & buf) -> ReturnType
{
return deserialize_nested(nested_column_, buf, elems[element_pos]);
});
});
else
deserializeTextJSONImpl<void>(column, istr, settings,
[&settings, &istr, this](IColumn & nested_column, size_t element_pos) -> void
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, elems[element_pos]);
else
elems[element_pos]->deserializeTextJSON(nested_column, istr, settings);
});
return deserializeTupleJSONImpl<ReturnType>(column, istr, settings,
[&deserialize_nested, &istr, this](IColumn & nested_column, size_t element_pos) -> ReturnType
{
return deserialize_nested(nested_column, istr, elems[element_pos]);
});
}
bool SerializationTuple::tryDeserializeTextJSON(DB::IColumn & column, DB::ReadBuffer & istr, const DB::FormatSettings & settings) const
void SerializationTuple::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.json.empty_as_default)
return deserializeEmpyStringAsDefaultOrNested<bool>(column, istr, settings);
deserializeTextJSONImpl<void>(column, istr, settings);
}
return deserializeTextJSONImpl<bool>(column, istr, settings,
[&settings, &istr, this](IColumn & nested_column, size_t element_pos) -> bool
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, istr, settings, elems[element_pos]);
return elems[element_pos]->tryDeserializeTextJSON(nested_column, istr, settings);
});
bool SerializationTuple::tryDeserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
return deserializeTextJSONImpl<bool>(column, istr, settings);
}

View File

@ -82,10 +82,10 @@ private:
ReturnType deserializeTextImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const;
template <typename ReturnType>
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const;
ReturnType deserializeTupleJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserialize_element) const;
template <typename ReturnType = void>
ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
template <typename ReturnType>
ReturnType deserializeTextJSONImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;
template <typename ReturnType = void>
ReturnType deserializeTextCSVImpl(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const;

View File

@ -2,11 +2,14 @@
#include <Formats/JSONUtils.h>
#include <Formats/ReadSchemaUtils.h>
#include <Formats/EscapingRuleUtils.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferValidUTF8.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/DataTypeFactory.h>
#include <Common/assert_cast.h>
#include <base/find_symbols.h>
@ -267,9 +270,6 @@ namespace JSONUtils
const FormatSettings & format_settings,
bool yield_strings)
{
static constexpr auto EMPTY_STRING = "\"\"";
static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length();
try
{
bool as_nullable = format_settings.null_as_default && !isNullableOrLowCardinalityNullable(type);
@ -288,70 +288,19 @@ namespace JSONUtils
return true;
}
auto do_deserialize = [](IColumn & column_, ReadBuffer & buf_, auto && check_for_empty_string, auto && deserialize) -> bool
{
if (check_for_empty_string(buf_))
{
column_.insertDefault();
return false;
}
else
return deserialize(column_, buf_);
};
auto deserialize_impl = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf_) -> bool
auto deserialize = [as_nullable, &format_settings, &serialization](IColumn & column_, ReadBuffer & buf) -> bool
{
if (as_nullable)
return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf_, format_settings, serialization);
return SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(column_, buf, format_settings, serialization);
serialization->deserializeTextJSON(column_, buf_, format_settings);
serialization->deserializeTextJSON(column_, buf, format_settings);
return true;
};
if (!format_settings.json.empty_as_default || in.eof() || *in.position() != EMPTY_STRING[0])
return deserialize_impl(column, in);
if (in.available() >= EMPTY_STRING_LENGTH)
{
/// We have enough data in buffer to check if we have an empty string.
auto check_for_empty_string = [](ReadBuffer & buf_)
{
auto * pos = buf_.position();
if (checkString(EMPTY_STRING, buf_))
return true;
buf_.position() = pos;
return false;
};
return do_deserialize(column, in, check_for_empty_string, deserialize_impl);
}
/// We don't have enough data in buffer to check if we have an empty string.
/// Use PeekableReadBuffer to make a checkpoint before checking for an
/// empty string and rollback if check was failed.
auto check_for_empty_string = [](ReadBuffer & buf_) -> bool
{
auto & peekable_buf = assert_cast<PeekableReadBuffer &>(buf_);
peekable_buf.setCheckpoint();
SCOPE_EXIT(peekable_buf.dropCheckpoint());
if (checkString(EMPTY_STRING, peekable_buf))
return true;
peekable_buf.rollbackToCheckpoint();
return false;
};
auto deserialize_impl_with_check = [&deserialize_impl](IColumn & column_, ReadBuffer & buf_) -> bool
{
auto & peekable_buf = assert_cast<PeekableReadBuffer &>(buf_);
bool res = deserialize_impl(column_, peekable_buf);
if (unlikely(peekable_buf.hasUnreadData()))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available()));
return res;
};
PeekableReadBuffer peekable_buf(in, true);
return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_impl_with_check);
if (format_settings.json.empty_as_default)
return JSONUtils::deserializeEmpyStringAsDefaultOrNested<bool, false>(column, in, deserialize);
else
return deserialize(column, in);
}
catch (Exception & e)
{
@ -915,6 +864,78 @@ namespace JSONUtils
}
}
template <typename ReturnType, bool default_column_return_value>
ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize<ReturnType> & deserialize_nested)
{
static constexpr auto throw_exception = std::is_same_v<ReturnType, void>;
static constexpr auto EMPTY_STRING = "\"\"";
static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length();
if (istr.eof() || *istr.position() != EMPTY_STRING[0])
return deserialize_nested(column, istr);
auto do_deserialize = [](IColumn & column_, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize) -> ReturnType
{
if (check_for_empty_string(buf))
{
column_.insertDefault();
return ReturnType(default_column_return_value);
}
return deserialize(column_, buf);
};
if (istr.available() >= EMPTY_STRING_LENGTH)
{
/// We have enough data in buffer to check if we have an empty string.
auto check_for_empty_string = [](ReadBuffer & buf) -> bool
{
auto * pos = buf.position();
if (checkString(EMPTY_STRING, buf))
return true;
buf.position() = pos;
return false;
};
return do_deserialize(column, istr, check_for_empty_string, deserialize_nested);
}
/// We don't have enough data in buffer to check if we have an empty string.
/// Use PeekableReadBuffer to make a checkpoint before checking for an
/// empty string and rollback if check was failed.
auto check_for_empty_string = [](ReadBuffer & buf) -> bool
{
auto & peekable_buf = assert_cast<PeekableReadBuffer &>(buf);
peekable_buf.setCheckpoint();
SCOPE_EXIT(peekable_buf.dropCheckpoint());
if (checkString(EMPTY_STRING, peekable_buf))
return true;
peekable_buf.rollbackToCheckpoint();
return false;
};
auto deserialize_nested_with_check = [&deserialize_nested](IColumn & column_, ReadBuffer & buf) -> ReturnType
{
auto & peekable_buf = assert_cast<PeekableReadBuffer &>(buf);
if constexpr (throw_exception)
deserialize_nested(column_, peekable_buf);
else if (!deserialize_nested(column_, peekable_buf))
return ReturnType(false);
if (unlikely(peekable_buf.hasUnreadData()))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available()));
return ReturnType(true);
};
PeekableReadBuffer peekable_buf(istr, true);
return do_deserialize(column, peekable_buf, check_for_empty_string, deserialize_nested_with_check);
}
template void deserializeEmpyStringAsDefaultOrNested<void, true>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<void> & deserialize_nested);
template bool deserializeEmpyStringAsDefaultOrNested<bool, true>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<bool> & deserialize_nested);
template bool deserializeEmpyStringAsDefaultOrNested<bool, false>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<bool> & deserialize_nested);
}
}

View File

@ -2,16 +2,13 @@
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/ISerialization.h>
#include <DataTypes/Serializations/SerializationNullable.h>
#include <Formats/FormatSettings.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/Progress.h>
#include <Core/NamesAndTypes.h>
#include <Common/assert_cast.h>
#include <Common/Stopwatch.h>
#include <functional>
#include <utility>
namespace DB
@ -20,11 +17,6 @@ namespace DB
class Block;
struct JSONInferenceInfo;
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace JSONUtils
{
std::pair<bool, size_t> fileSegmentationEngineJSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows);
@ -147,104 +139,14 @@ namespace JSONUtils
void skipTheRestOfObject(ReadBuffer & in, const FormatSettings::JSON & settings);
template <typename ReturnType>
ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, auto && deserializer)
{
static constexpr auto throw_exception = std::is_same_v<ReturnType, void>;
using NestedDeserialize = std::function<ReturnType(IColumn &, ReadBuffer &)>;
static constexpr auto EMPTY_STRING = "\"\"";
static constexpr auto EMPTY_STRING_LENGTH = std::string_view(EMPTY_STRING).length();
template <typename ReturnType, bool default_column_return_value = true>
ReturnType deserializeEmpyStringAsDefaultOrNested(IColumn & column, ReadBuffer & istr, const NestedDeserialize<ReturnType> & deserialize_nested);
auto do_deserialize_nested = [](IColumn & nested_column, ReadBuffer & buf, auto && check_for_empty_string, auto && deserialize, const SerializationPtr & nested_column_serialization) -> ReturnType
{
if (check_for_empty_string(buf))
{
nested_column.insertDefault();
return ReturnType(true);
}
return deserialize(nested_column, buf, nested_column_serialization);
};
auto deserialize_nested_impl = [&settings](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType
{
if constexpr (throw_exception)
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
SerializationNullable::deserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization);
else
nested_column_serialization->deserializeTextJSON(nested_column, buf, settings);
}
else
{
if (settings.null_as_default && !isColumnNullableOrLowCardinalityNullable(nested_column))
return SerializationNullable::tryDeserializeNullAsDefaultOrNestedTextJSON(nested_column, buf, settings, nested_column_serialization);
return nested_column_serialization->tryDeserializeTextJSON(nested_column, buf, settings);
}
};
auto deserialize_nested = [&do_deserialize_nested, &deserialize_nested_impl](IColumn & nested_column, ReadBuffer & buf, const SerializationPtr & nested_column_serialization) -> ReturnType
{
if (buf.eof() || *buf.position() != EMPTY_STRING[0])
return deserialize_nested_impl(nested_column, buf, nested_column_serialization);
if (buf.available() >= EMPTY_STRING_LENGTH)
{
/// We have enough data in buffer to check if we have an empty string.
auto check_for_empty_string = [](ReadBuffer & buf_) -> bool
{
auto * pos = buf_.position();
if (checkString(EMPTY_STRING, buf_))
return true;
buf_.position() = pos;
return false;
};
return do_deserialize_nested(nested_column, buf, check_for_empty_string, deserialize_nested_impl, nested_column_serialization);
}
/// We don't have enough data in buffer to check if we have an empty string.
/// Use PeekableReadBuffer to make a checkpoint before checking for an
/// empty string and rollback if check was failed.
auto check_for_empty_string = [](ReadBuffer & buf_) -> bool
{
auto & peekable_buf = assert_cast<PeekableReadBuffer &>(buf_);
peekable_buf.setCheckpoint();
SCOPE_EXIT(peekable_buf.dropCheckpoint());
if (checkString(EMPTY_STRING, peekable_buf))
return true;
peekable_buf.rollbackToCheckpoint();
return false;
};
auto deserialize_nested_impl_with_check = [&deserialize_nested_impl](IColumn & nested_column_, ReadBuffer & buf_, const SerializationPtr & nested_column_serialization_) -> ReturnType
{
auto & peekable_buf = assert_cast<PeekableReadBuffer &>(buf_);
auto enforceNoUnreadData = [&peekable_buf]() -> void
{
if (unlikely(peekable_buf.hasUnreadData()))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect state while parsing JSON: PeekableReadBuffer has unread data in own memory: {}", String(peekable_buf.position(), peekable_buf.available()));
};
if constexpr (throw_exception)
{
deserialize_nested_impl(nested_column_, peekable_buf, nested_column_serialization_);
enforceNoUnreadData();
}
else
{
bool res = deserialize_nested_impl(nested_column_, peekable_buf, nested_column_serialization_);
enforceNoUnreadData();
return res;
}
};
PeekableReadBuffer peekable_buf(buf, true);
return do_deserialize_nested(nested_column, peekable_buf, check_for_empty_string, deserialize_nested_impl_with_check, nested_column_serialization);
};
return deserializer(column, istr, deserialize_nested);
}
extern template void deserializeEmpyStringAsDefaultOrNested<void, true>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<void> & deserialize_nested);
extern template bool deserializeEmpyStringAsDefaultOrNested<bool, true>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<bool> & deserialize_nested);
extern template bool deserializeEmpyStringAsDefaultOrNested<bool, false>(IColumn & column, ReadBuffer & istr, const NestedDeserialize<bool> & deserialize_nested);
}
}

View File

@ -0,0 +1,8 @@
Array(UUID)
{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]}
{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]}
{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e","00000000-0000-0000-0000-000000000000"]}
Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))
{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]}
{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]}
{"x":[["00000000-0000-0000-0000-000000000000"],["00000000-0000-0000-0000-000000000000",{"abc":"::"}]]}

View File

@ -0,0 +1,31 @@
#!/usr/bin/env bash
# Tags: no-parallel
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.json
# Wrapper for clickhouse-client to always output in JSONEachRow format, that
# way format settings will not affect output.
function clickhouse_local()
{
$CLICKHOUSE_LOCAL --output-format JSONEachRow "$@"
}
echo 'Array(UUID)'
echo '{"x":["00000000-0000-0000-0000-000000000000","b15f852c-c41a-4fd6-9247-1929c841715e",""]}' > $DATA_FILE
# Use increasingly smaller read buffers.
clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=4"
clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=2"
clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Array(UUID)') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1"
echo 'Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))'
echo '{"x":[[""], ["",{"abc":""}]]}' > $DATA_FILE
# Use increasingly smaller read buffers.
clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=16"
clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=8"
clickhouse_local -q "SELECT x FROM file('$DATA_FILE', 'JSONEachRow', 'x Tuple(Array(UUID), Tuple(UUID, Map(String, IPv6)))') SETTINGS input_format_json_empty_as_default=1, input_format_parallel_parsing=0, storage_file_read_method='read', max_read_buffer_size=1"
rm $DATA_FILE