Address comments

This commit is contained in:
avogar 2024-01-18 15:15:57 +00:00
parent 9e639df12e
commit 8ac04c6dd8
4 changed files with 57 additions and 37 deletions

View File

@ -105,6 +105,15 @@ ColumnPtr DataTypeVariant::createColumnConst(size_t size, const DB::Field & fiel
}
else
{
/// We don't have exact mapping Field type -> Data type, so we cannot
/// always know in which variant we need to insert the field by it's type.
/// Examples:
/// Field(42) and Variant(UInt16, String). Type of the Field - UInt64, but we can insert it in UInt16
/// Field(42) and Variant(Date, String). Type of the Field - UInt64, but we can insert it in Date
/// Let's first apply FieldToDataType visitor to find best Data type for this field.
/// If we have variant with such type, we will insert this field into it.
/// Otherwise we will try to find the first variant that has default Field value with the same type.
auto field_type = applyVisitor(FieldToDataType(), field);
auto discr = tryGetVariantDiscriminator(field_type);
if (!discr)

View File

@ -200,19 +200,12 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
for (size_t i = 0; i != limit; ++i)
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
/// Second, serialize variants in global order.
/// Second, serialize non-empty variant (other variants are empty and we can skip their serialization).
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i != variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
/// For non empty variant use the same offset/limit as for whole Variant column
if (i == non_empty_global_discr)
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), offset, limit, settings, variant_state->states[i]);
/// For empty variants, use just 0/0, they won't serialize anything.
else
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
settings.path.pop_back();
}
addVariantElementToPath(settings.path, non_empty_global_discr);
/// We can use the same offset/limit as for whole Variant column
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
settings.path.pop_back();
settings.path.pop_back();
return;
}
@ -237,26 +230,22 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
}
}
/// If limit for some variant is 0, it means that we don't have its discriminator in the range.
/// Set offset to the size of column for such variants, so we won't serialize values from them.
for (size_t i = 0; i != variant_offsets_and_limits.size(); ++i)
{
if (!variant_offsets_and_limits[i].second)
variant_offsets_and_limits[i].first = col.getVariantByGlobalDiscriminator(i).size();
}
/// Serialize variants in global order.
settings.path.push_back(Substream::VariantElements);
for (size_t i = 0; i != variants.size(); ++i)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkWithMultipleStreams(
col.getVariantByGlobalDiscriminator(i),
variant_offsets_and_limits[i].first,
variant_offsets_and_limits[i].second,
settings,
variant_state->states[i]);
settings.path.pop_back();
/// Serialize variant only if we have its discriminator in the range.
if (variant_offsets_and_limits[i].second)
{
addVariantElementToPath(settings.path, i);
variants[i]->serializeBinaryBulkWithMultipleStreams(
col.getVariantByGlobalDiscriminator(i),
variant_offsets_and_limits[i].first,
variant_offsets_and_limits[i].second,
settings,
variant_state->states[i]);
settings.path.pop_back();
}
}
settings.path.pop_back();
}
@ -564,9 +553,6 @@ std::vector<size_t> SerializationVariant::getVariantsDeserializeTextOrder(const
}
std::sort(order.begin(), order.end(), [&](size_t left, size_t right) { return priorities[left] > priorities[right]; });
String types_order;
for (auto i : order)
types_order += " " + variant_types[i]->getName();
return order;
}

View File

@ -6,6 +6,29 @@
namespace DB
{
/// Class for serializing/deserializing column with Variant type.
/// It supports both text and binary bulk serializations/deserializations.
///
/// During text serialization it checks discriminator of the current row and
/// uses corresponding text serialization of this variant.
///
/// During text deserialization it tries all variants deserializations
/// (using tryDeserializeText* methods of ISerialization) in predefined order
/// and inserts data in the first variant with succeeded deserialization.
///
/// During binary bulk serialization it transforms local discriminators
/// to global and serializes them into a separate stream VariantDiscriminators.
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
/// in the discriminators column), so during deserialization the limit for each variant is
/// calculated according to discriminators column.
/// Offsets column is not serialized and stored only in memory.
///
/// During binary bulk deserialization we first deserialize discriminators from corresponding stream
/// and use them to calculate the limit for each variant. Each variant is deserialized from
/// corresponding stream using calculated limit. Offsets column is not deserialized and constructed
/// according to discriminators.
class SerializationVariant : public ISerialization
{
public:

View File

@ -149,19 +149,21 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
assert_cast<ColumnLowCardinality &>(*variant_element_state->variant->assumeMutable()).nestedRemoveNullable();
}
/// If nothing to deserialize, just insert defaults.
if (variant_limit == 0)
{
mutable_column->insertManyDefaults(limit);
return;
}
addVariantToPath(settings.path);
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache);
removeVariantFromPath(settings.path);
size_t variant_offset = variant_element_state->variant->size() - variant_limit;
/// If don't have our discriminator in range, just insert defaults.
if (variant_limit == 0)
{
mutable_column->insertManyDefaults(limit);
}
/// If we have only our discriminator in range, insert the whole range to result column.
else if (variant_limit == limit)
if (variant_limit == limit)
{
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit);
}