mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-05 05:52:05 +00:00
Address comments
This commit is contained in:
parent
9e639df12e
commit
8ac04c6dd8
@ -105,6 +105,15 @@ ColumnPtr DataTypeVariant::createColumnConst(size_t size, const DB::Field & fiel
|
||||
}
|
||||
else
|
||||
{
|
||||
/// We don't have exact mapping Field type -> Data type, so we cannot
|
||||
/// always know in which variant we need to insert the field by it's type.
|
||||
/// Examples:
|
||||
/// Field(42) and Variant(UInt16, String). Type of the Field - UInt64, but we can insert it in UInt16
|
||||
/// Field(42) and Variant(Date, String). Type of the Field - UInt64, but we can insert it in Date
|
||||
|
||||
/// Let's first apply FieldToDataType visitor to find best Data type for this field.
|
||||
/// If we have variant with such type, we will insert this field into it.
|
||||
/// Otherwise we will try to find the first variant that has default Field value with the same type.
|
||||
auto field_type = applyVisitor(FieldToDataType(), field);
|
||||
auto discr = tryGetVariantDiscriminator(field_type);
|
||||
if (!discr)
|
||||
|
@ -200,19 +200,12 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
|
||||
for (size_t i = 0; i != limit; ++i)
|
||||
writeBinaryLittleEndian(non_empty_global_discr, *discriminators_stream);
|
||||
|
||||
/// Second, serialize variants in global order.
|
||||
/// Second, serialize non-empty variant (other variants are empty and we can skip their serialization).
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
/// For non empty variant use the same offset/limit as for whole Variant column
|
||||
if (i == non_empty_global_discr)
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), offset, limit, settings, variant_state->states[i]);
|
||||
/// For empty variants, use just 0/0, they won't serialize anything.
|
||||
else
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(i), 0, 0, settings, variant_state->states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
addVariantElementToPath(settings.path, non_empty_global_discr);
|
||||
/// We can use the same offset/limit as for whole Variant column
|
||||
variants[non_empty_global_discr]->serializeBinaryBulkWithMultipleStreams(col.getVariantByGlobalDiscriminator(non_empty_global_discr), offset, limit, settings, variant_state->states[non_empty_global_discr]);
|
||||
settings.path.pop_back();
|
||||
settings.path.pop_back();
|
||||
return;
|
||||
}
|
||||
@ -237,26 +230,22 @@ void SerializationVariant::serializeBinaryBulkWithMultipleStreams(
|
||||
}
|
||||
}
|
||||
|
||||
/// If limit for some variant is 0, it means that we don't have its discriminator in the range.
|
||||
/// Set offset to the size of column for such variants, so we won't serialize values from them.
|
||||
for (size_t i = 0; i != variant_offsets_and_limits.size(); ++i)
|
||||
{
|
||||
if (!variant_offsets_and_limits[i].second)
|
||||
variant_offsets_and_limits[i].first = col.getVariantByGlobalDiscriminator(i).size();
|
||||
}
|
||||
|
||||
/// Serialize variants in global order.
|
||||
settings.path.push_back(Substream::VariantElements);
|
||||
for (size_t i = 0; i != variants.size(); ++i)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(
|
||||
col.getVariantByGlobalDiscriminator(i),
|
||||
variant_offsets_and_limits[i].first,
|
||||
variant_offsets_and_limits[i].second,
|
||||
settings,
|
||||
variant_state->states[i]);
|
||||
settings.path.pop_back();
|
||||
/// Serialize variant only if we have its discriminator in the range.
|
||||
if (variant_offsets_and_limits[i].second)
|
||||
{
|
||||
addVariantElementToPath(settings.path, i);
|
||||
variants[i]->serializeBinaryBulkWithMultipleStreams(
|
||||
col.getVariantByGlobalDiscriminator(i),
|
||||
variant_offsets_and_limits[i].first,
|
||||
variant_offsets_and_limits[i].second,
|
||||
settings,
|
||||
variant_state->states[i]);
|
||||
settings.path.pop_back();
|
||||
}
|
||||
}
|
||||
settings.path.pop_back();
|
||||
}
|
||||
@ -564,9 +553,6 @@ std::vector<size_t> SerializationVariant::getVariantsDeserializeTextOrder(const
|
||||
}
|
||||
|
||||
std::sort(order.begin(), order.end(), [&](size_t left, size_t right) { return priorities[left] > priorities[right]; });
|
||||
String types_order;
|
||||
for (auto i : order)
|
||||
types_order += " " + variant_types[i]->getName();
|
||||
return order;
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,29 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Class for serializing/deserializing column with Variant type.
|
||||
/// It supports both text and binary bulk serializations/deserializations.
|
||||
///
|
||||
/// During text serialization it checks discriminator of the current row and
|
||||
/// uses corresponding text serialization of this variant.
|
||||
///
|
||||
/// During text deserialization it tries all variants deserializations
|
||||
/// (using tryDeserializeText* methods of ISerialization) in predefined order
|
||||
/// and inserts data in the first variant with succeeded deserialization.
|
||||
///
|
||||
/// During binary bulk serialization it transforms local discriminators
|
||||
/// to global and serializes them into a separate stream VariantDiscriminators.
|
||||
/// Each variant is serialized into a separate stream with path VariantElements/VariantElement
|
||||
/// (VariantElements stream is needed for correct sub-columns creation). We store and serialize
|
||||
/// variants in a sparse form (the size of a variant column equals to the number of its discriminator
|
||||
/// in the discriminators column), so during deserialization the limit for each variant is
|
||||
/// calculated according to discriminators column.
|
||||
/// Offsets column is not serialized and stored only in memory.
|
||||
///
|
||||
/// During binary bulk deserialization we first deserialize discriminators from corresponding stream
|
||||
/// and use them to calculate the limit for each variant. Each variant is deserialized from
|
||||
/// corresponding stream using calculated limit. Offsets column is not deserialized and constructed
|
||||
/// according to discriminators.
|
||||
class SerializationVariant : public ISerialization
|
||||
{
|
||||
public:
|
||||
|
@ -149,19 +149,21 @@ void SerializationVariantElement::deserializeBinaryBulkWithMultipleStreams(
|
||||
assert_cast<ColumnLowCardinality &>(*variant_element_state->variant->assumeMutable()).nestedRemoveNullable();
|
||||
}
|
||||
|
||||
/// If nothing to deserialize, just insert defaults.
|
||||
if (variant_limit == 0)
|
||||
{
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
return;
|
||||
}
|
||||
|
||||
addVariantToPath(settings.path);
|
||||
nested_serialization->deserializeBinaryBulkWithMultipleStreams(variant_element_state->variant, variant_limit, settings, variant_element_state->variant_element_state, cache);
|
||||
removeVariantFromPath(settings.path);
|
||||
|
||||
size_t variant_offset = variant_element_state->variant->size() - variant_limit;
|
||||
|
||||
/// If don't have our discriminator in range, just insert defaults.
|
||||
if (variant_limit == 0)
|
||||
{
|
||||
mutable_column->insertManyDefaults(limit);
|
||||
}
|
||||
/// If we have only our discriminator in range, insert the whole range to result column.
|
||||
else if (variant_limit == limit)
|
||||
if (variant_limit == limit)
|
||||
{
|
||||
mutable_column->insertRangeFrom(*variant_element_state->variant, variant_offset, variant_limit);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user