Merge pull request #61364 from ClickHouse/vdimir/fix_crash_objson_array_null

Fix crash in ObjectJson parsing array with nulls
This commit is contained in:
vdimir 2024-03-27 13:41:09 +01:00 committed by GitHub
commit 3fa6d23730
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 50 additions and 13 deletions

View File

@ -20,12 +20,12 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int ILLEGAL_COLUMN; extern const int ARGUMENT_OUT_OF_BOUND;
extern const int DUPLICATE_COLUMN; extern const int DUPLICATE_COLUMN;
extern const int EXPERIMENTAL_FEATURE_ERROR;
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_DIMENSIONS_MISMATCHED; extern const int NUMBER_OF_DIMENSIONS_MISMATCHED;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int EXPERIMENTAL_FEATURE_ERROR;
} }
namespace namespace
@ -334,7 +334,18 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
if (type_changed || info.need_convert) if (type_changed || info.need_convert)
field = convertFieldToTypeOrThrow(field, *least_common_type.get()); field = convertFieldToTypeOrThrow(field, *least_common_type.get());
data.back()->insert(field); if (!data.back()->tryInsert(field))
{
/** Normalization of the field above is pretty complicated (it uses several FieldVisitors),
* so in the case of a bug, we may get mismatched types.
* The `IColumn::insert` method does not check the type of the inserted field, and it can lead to a segmentation fault.
* Therefore, we use the safer `tryInsert` method to get an exception instead of a segmentation fault.
*/
throw Exception(ErrorCodes::EXPERIMENTAL_FEATURE_ERROR,
"Cannot insert field {} to column {}",
field.dump(), data.back()->dumpStructure());
}
++num_rows; ++num_rows;
} }

View File

@ -460,6 +460,28 @@ Float32 ColumnVector<T>::getFloat32(size_t n [[maybe_unused]]) const
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as Float32", TypeName<T>); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot get the value of {} as Float32", TypeName<T>);
} }
template <typename T>
bool ColumnVector<T>::tryInsert(const DB::Field & x)
{
NearestFieldType<T> value;
if (!x.tryGet<NearestFieldType<T>>(value))
{
if constexpr (std::is_same_v<T, UInt8>)
{
/// It's also possible to insert boolean values into UInt8 column.
bool boolean_value;
if (x.tryGet<bool>(boolean_value))
{
data.push_back(static_cast<T>(boolean_value));
return true;
}
}
return false;
}
data.push_back(static_cast<T>(value));
return true;
}
template <typename T> template <typename T>
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length) void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{ {

View File

@ -224,14 +224,8 @@ public:
data.push_back(static_cast<T>(x.get<T>())); data.push_back(static_cast<T>(x.get<T>()));
} }
bool tryInsert(const DB::Field & x) override bool tryInsert(const DB::Field & x) override;
{
NearestFieldType<T> value;
if (!x.tryGet<NearestFieldType<T>>(value))
return false;
data.push_back(static_cast<T>(value));
return true;
}
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;

View File

@ -1054,6 +1054,14 @@ Field FieldVisitorFoldDimension::operator()(const Array & x) const
return res; return res;
} }
Field FieldVisitorFoldDimension::operator()(const Null & x) const
{
if (num_dimensions_to_fold == 0)
return x;
return Array();
}
void setAllObjectsToDummyTupleType(NamesAndTypesList & columns) void setAllObjectsToDummyTupleType(NamesAndTypesList & columns)
{ {
for (auto & column : columns) for (auto & column : columns)

View File

@ -149,7 +149,7 @@ public:
Field operator()(const Array & x) const; Field operator()(const Array & x) const;
Field operator()(const Null & x) const { return x; } Field operator()(const Null & x) const;
template <typename T> template <typename T>
Field operator()(const T & x) const Field operator()(const T & x) const

View File

@ -16,3 +16,4 @@
{"x":[[],[1,2]]} {"x":[[],[1,2]]}
{"x":[[],[[1],[2]]]} {"x":[[],[[1],[2]]]}
{"x":[[],[[],[2]]]} {"x":[[],[[],[2]]]}
{"a.a":[[1],[]]}

View File

@ -32,3 +32,4 @@ SELECT CAST('{"x" : [ 1 , [ 1 , 2] ]}', 'Object(\'json\')');
SELECT CAST('{"x" : [ {} , [ 1 , 2] ]}', 'Object(\'json\')'); SELECT CAST('{"x" : [ {} , [ 1 , 2] ]}', 'Object(\'json\')');
SELECT CAST('{"x" : [ {} , [ 1 , [2]] ]}', 'Object(\'json\')'); SELECT CAST('{"x" : [ {} , [ 1 , [2]] ]}', 'Object(\'json\')');
SELECT CAST('{"x" : [ {} , [ {} , [2]] ]}', 'Object(\'json\')'); SELECT CAST('{"x" : [ {} , [ {} , [2]] ]}', 'Object(\'json\')');
SELECT CAST(' {"a": { "a": [ [1], null ] } }', 'Object(Nullable(\'json\'))');