Merge pull request #68953 from Avogar/fix-dynamic-unitialized-value

Don't use serializations cache in const Dynamic column methods
This commit is contained in:
Kruglov Pavel 2024-08-28 11:39:00 +00:00 committed by GitHub
commit e7d63b5f79
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 19 additions and 13 deletions

View File

@ -300,7 +300,7 @@ void ColumnDynamic::get(size_t n, Field & res) const
auto value_data = shared_variant.getDataAt(variant_col.offsetAt(n)); auto value_data = shared_variant.getDataAt(variant_col.offsetAt(n));
ReadBufferFromMemory buf(value_data.data, value_data.size); ReadBufferFromMemory buf(value_data.data, value_data.size);
auto type = decodeDataType(buf); auto type = decodeDataType(buf);
getVariantSerialization(type)->deserializeBinary(res, buf, getFormatSettings()); type->getDefaultSerialization()->deserializeBinary(res, buf, getFormatSettings());
} }
@ -736,8 +736,7 @@ StringRef ColumnDynamic::serializeValueIntoArena(size_t n, Arena & arena, const
{ {
const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(discr); const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(discr);
encodeDataType(variant_type, buf); encodeDataType(variant_type, buf);
getVariantSerialization(variant_type, variant_info.variant_names[discr]) variant_type->getDefaultSerialization()->serializeBinary(variant_col.getVariantByGlobalDiscriminator(discr), variant_col.offsetAt(n), buf, getFormatSettings());
->serializeBinary(variant_col.getVariantByGlobalDiscriminator(discr), variant_col.offsetAt(n), buf, getFormatSettings());
type_and_value = buf.str(); type_and_value = buf.str();
} }
@ -870,7 +869,7 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_
/// We have both values serialized in binary format, so we need to /// We have both values serialized in binary format, so we need to
/// create temporary column, insert both values into it and compare. /// create temporary column, insert both values into it and compare.
auto tmp_column = left_data_type->createColumn(); auto tmp_column = left_data_type->createColumn();
const auto & serialization = getVariantSerialization(left_data_type, left_data_type_name); const auto & serialization = left_data_type->getDefaultSerialization();
serialization->deserializeBinary(*tmp_column, buf_left, getFormatSettings()); serialization->deserializeBinary(*tmp_column, buf_left, getFormatSettings());
serialization->deserializeBinary(*tmp_column, buf_right, getFormatSettings()); serialization->deserializeBinary(*tmp_column, buf_right, getFormatSettings());
return tmp_column->compareAt(0, 1, *tmp_column, nan_direction_hint); return tmp_column->compareAt(0, 1, *tmp_column, nan_direction_hint);
@ -892,7 +891,7 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_
/// We have left value serialized in binary format, we need to /// We have left value serialized in binary format, we need to
/// create temporary column, insert the value into it and compare. /// create temporary column, insert the value into it and compare.
auto tmp_column = left_data_type->createColumn(); auto tmp_column = left_data_type->createColumn();
getVariantSerialization(left_data_type, left_data_type_name)->deserializeBinary(*tmp_column, buf_left, getFormatSettings()); left_data_type->getDefaultSerialization()->deserializeBinary(*tmp_column, buf_left, getFormatSettings());
return tmp_column->compareAt(0, right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint); return tmp_column->compareAt(0, right_variant.offsetAt(m), right_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint);
} }
/// Check if only right value is in shared data. /// Check if only right value is in shared data.
@ -912,7 +911,7 @@ int ColumnDynamic::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_
/// We have right value serialized in binary format, we need to /// We have right value serialized in binary format, we need to
/// create temporary column, insert the value into it and compare. /// create temporary column, insert the value into it and compare.
auto tmp_column = right_data_type->createColumn(); auto tmp_column = right_data_type->createColumn();
getVariantSerialization(right_data_type, right_data_type_name)->deserializeBinary(*tmp_column, buf_right, getFormatSettings()); right_data_type->getDefaultSerialization()->deserializeBinary(*tmp_column, buf_right, getFormatSettings());
return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), 0, *tmp_column, nan_direction_hint); return left_variant.getVariantByGlobalDiscriminator(left_discr).compareAt(left_variant.offsetAt(n), 0, *tmp_column, nan_direction_hint);
} }
/// Otherwise both values are regular variants. /// Otherwise both values are regular variants.

View File

@ -414,7 +414,7 @@ public:
/// Insert value into shared variant. Also updates Variant discriminators and offsets. /// Insert value into shared variant. Also updates Variant discriminators and offsets.
void insertValueIntoSharedVariant(const IColumn & src, const DataTypePtr & type, const String & type_name, size_t n); void insertValueIntoSharedVariant(const IColumn & src, const DataTypePtr & type, const String & type_name, size_t n);
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type, const String & variant_name) const const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type, const String & variant_name)
{ {
/// Get serialization for provided data type. /// Get serialization for provided data type.
/// To avoid calling type->getDefaultSerialization() every time we use simple cache with max size. /// To avoid calling type->getDefaultSerialization() every time we use simple cache with max size.
@ -428,7 +428,7 @@ public:
return serialization_cache.emplace(variant_name, variant_type->getDefaultSerialization()).first->second; return serialization_cache.emplace(variant_name, variant_type->getDefaultSerialization()).first->second;
} }
const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) const { return getVariantSerialization(variant_type, variant_type->getName()); } const SerializationPtr & getVariantSerialization(const DataTypePtr & variant_type) { return getVariantSerialization(variant_type, variant_type->getName()); }
private: private:
void createVariantInfo(const DataTypePtr & variant_type); void createVariantInfo(const DataTypePtr & variant_type);
@ -473,7 +473,7 @@ private:
/// We can use serializations of different data types to serialize values into shared variant. /// We can use serializations of different data types to serialize values into shared variant.
/// To avoid creating the same serialization multiple times, use simple cache. /// To avoid creating the same serialization multiple times, use simple cache.
static const size_t SERIALIZATION_CACHE_MAX_SIZE = 256; static const size_t SERIALIZATION_CACHE_MAX_SIZE = 256;
mutable std::unordered_map<String, SerializationPtr> serialization_cache; std::unordered_map<String, SerializationPtr> serialization_cache;
}; };
void extendVariantColumn( void extendVariantColumn(

View File

@ -185,7 +185,7 @@ std::unique_ptr<IDataType::SubstreamData> DataTypeDynamic::getDynamicSubcolumnDa
auto type = decodeDataType(buf); auto type = decodeDataType(buf);
if (type->getName() == subcolumn_type_name) if (type->getName() == subcolumn_type_name)
{ {
dynamic_column.getVariantSerialization(subcolumn_type, subcolumn_type_name)->deserializeBinary(*subcolumn, buf, format_settings); subcolumn_type->getDefaultSerialization()->deserializeBinary(*subcolumn, buf, format_settings);
null_map.push_back(0); null_map.push_back(0);
} }
else else

View File

@ -489,9 +489,8 @@ void SerializationDynamic::serializeBinary(const IColumn & column, size_t row_nu
} }
const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(global_discr); const auto & variant_type = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariant(global_discr);
const auto & variant_type_name = variant_info.variant_names[global_discr];
encodeDataType(variant_type, ostr); encodeDataType(variant_type, ostr);
dynamic_column.getVariantSerialization(variant_type, variant_type_name)->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings); variant_type->getDefaultSerialization()->serializeBinary(variant_column.getVariantByGlobalDiscriminator(global_discr), variant_column.offsetAt(row_num), ostr, settings);
} }
template <typename ReturnType = void, typename DeserializeFunc> template <typename ReturnType = void, typename DeserializeFunc>
@ -629,7 +628,7 @@ static void serializeTextImpl(
ReadBufferFromMemory buf(value.data, value.size); ReadBufferFromMemory buf(value.data, value.size);
auto variant_type = decodeDataType(buf); auto variant_type = decodeDataType(buf);
auto tmp_variant_column = variant_type->createColumn(); auto tmp_variant_column = variant_type->createColumn();
auto variant_serialization = dynamic_column.getVariantSerialization(variant_type); auto variant_serialization = variant_type->getDefaultSerialization();
variant_serialization->deserializeBinary(*tmp_variant_column, buf, settings); variant_serialization->deserializeBinary(*tmp_variant_column, buf, settings);
nested_serialize(*variant_serialization, *tmp_variant_column, 0, ostr); nested_serialize(*variant_serialization, *tmp_variant_column, 0, ostr);
} }

View File

@ -0,0 +1,4 @@
str 3 \N
str 3 \N
str 3 \N
str 3 \N

View File

@ -0,0 +1,4 @@
set allow_experimental_dynamic_type=1;
set cast_keep_nullable=1;
SELECT toFixedString('str', 3), 3, CAST(if(1 = 0, toInt8(3), NULL), 'Int32') AS x from numbers(10) GROUP BY GROUPING SETS ((CAST(toInt32(1), 'Int32')), ('str', 3), (CAST(toFixedString('str', 3), 'Dynamic')), (CAST(toFixedString(toFixedString('str', 3), 3), 'Dynamic')));