mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Backport #70328 to 24.8: Fix ALTER of Dynamic type with reducing max_types parameter
This commit is contained in:
parent
fa3d99b00a
commit
87e3c55bca
@ -4487,26 +4487,124 @@ private:
|
|||||||
return [to_max_types]
|
return [to_max_types]
|
||||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
|
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
|
||||||
{
|
{
|
||||||
const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments[0].column);
|
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(*arguments[0].column);
|
||||||
/// We should use the same limit as already used in column and change only global limit.
|
/// We should use the same limit as already used in column and change only global limit.
|
||||||
/// It's needed because shared variant should contain values only when limit is exceeded,
|
/// It's needed because shared variant should contain values only when limit is exceeded,
|
||||||
/// so if there are already some data, we cannot increase the limit.
|
/// so if there are already some data, we cannot increase the limit.
|
||||||
return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types);
|
return ColumnDynamic::create(dynamic_column.getVariantColumnPtr(), dynamic_column.getVariantInfo(), dynamic_column.getMaxDynamicTypes(), to_max_types);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
return [to_max_types]
|
return [to_max_types]
|
||||||
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
|
(ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr
|
||||||
{
|
{
|
||||||
const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments[0].column);
|
const auto & dynamic_column = assert_cast<const ColumnDynamic &>(*arguments[0].column);
|
||||||
/// If real limit in the column is not greater than desired, just use the same variant column.
|
/// If real limit in the column is not greater than desired, just use the same variant column.
|
||||||
if (column_dynamic.getMaxDynamicTypes() <= to_max_types)
|
if (dynamic_column.getMaxDynamicTypes() <= to_max_types)
|
||||||
return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types);
|
return ColumnDynamic::create(dynamic_column.getVariantColumnPtr(), dynamic_column.getVariantInfo(), dynamic_column.getMaxDynamicTypes(), to_max_types);
|
||||||
|
|
||||||
|
/// Otherwise some variants should go to the shared variant. We try to keep the most frequent variants.
|
||||||
|
const auto & variant_info = dynamic_column.getVariantInfo();
|
||||||
|
const auto & variants = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
|
||||||
|
const auto & statistics = dynamic_column.getStatistics();
|
||||||
|
const auto & variant_column = dynamic_column.getVariantColumn();
|
||||||
|
auto shared_variant_discr = dynamic_column.getSharedVariantDiscriminator();
|
||||||
|
std::vector<std::tuple<size_t, String, DataTypePtr>> variants_with_sizes;
|
||||||
|
variants_with_sizes.reserve(variant_info.variant_names.size());
|
||||||
|
for (const auto & [name, discr] : variant_info.variant_name_to_discriminator)
|
||||||
|
{
|
||||||
|
/// Don't include shared variant.
|
||||||
|
if (discr == shared_variant_discr)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
size_t size = variant_column.getVariantByGlobalDiscriminator(discr).size();
|
||||||
|
/// If column has statistics from the data part, use size from it for consistency.
|
||||||
|
/// It's important to keep the same dynamic structure of the result column during ALTER.
|
||||||
|
if (statistics)
|
||||||
|
{
|
||||||
|
auto statistics_it = statistics->variants_statistics.find(name);
|
||||||
|
if (statistics_it != statistics->variants_statistics.end())
|
||||||
|
size = statistics_it->second;
|
||||||
|
}
|
||||||
|
variants_with_sizes.emplace_back(size, name, variants[discr]);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
|
||||||
|
DataTypes result_variants;
|
||||||
|
result_variants.reserve(to_max_types + 1); /// +1 for shared variant.
|
||||||
|
/// Add new variants from sorted list until we reach to_max_types.
|
||||||
|
for (const auto & [size, name, type] : variants_with_sizes)
|
||||||
|
{
|
||||||
|
if (result_variants.size() < to_max_types)
|
||||||
|
result_variants.push_back(type);
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add shared variant.
|
||||||
|
result_variants.push_back(ColumnDynamic::getSharedVariantDataType());
|
||||||
|
/// Create resulting Variant type and Dynamic column.
|
||||||
|
auto result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
|
||||||
|
auto result_dynamic_column = ColumnDynamic::create(result_variant_type->createColumn(), result_variant_type, to_max_types, to_max_types);
|
||||||
|
const auto & result_variant_info = result_dynamic_column->getVariantInfo();
|
||||||
|
auto & result_variant_column = result_dynamic_column->getVariantColumn();
|
||||||
|
auto result_shared_variant_discr = result_dynamic_column->getSharedVariantDiscriminator();
|
||||||
|
/// Create mapping from old discriminators to the new ones.
|
||||||
|
std::vector<ColumnVariant::Discriminator> old_to_new_discriminators;
|
||||||
|
old_to_new_discriminators.resize(variant_info.variant_name_to_discriminator.size(), result_shared_variant_discr);
|
||||||
|
for (const auto & [name, discr] : result_variant_info.variant_name_to_discriminator)
|
||||||
|
{
|
||||||
|
auto old_discr = variant_info.variant_name_to_discriminator.at(name);
|
||||||
|
old_to_new_discriminators[old_discr] = discr;
|
||||||
|
/// Reuse old variant column if it's not shared variant.
|
||||||
|
if (discr != result_shared_variant_discr)
|
||||||
|
result_variant_column.getVariantPtrByGlobalDiscriminator(discr) = variant_column.getVariantPtrByGlobalDiscriminator(old_discr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto & local_discriminators = variant_column.getLocalDiscriminators();
|
||||||
|
const auto & offsets = variant_column.getOffsets();
|
||||||
|
const auto & shared_variant = dynamic_column.getSharedVariant();
|
||||||
|
auto & result_local_discriminators = result_variant_column.getLocalDiscriminators();
|
||||||
|
result_local_discriminators.reserve(local_discriminators.size());
|
||||||
|
auto & result_offsets = result_variant_column.getOffsets();
|
||||||
|
result_offsets.reserve(offsets.size());
|
||||||
|
auto & result_shared_variant = result_dynamic_column->getSharedVariant();
|
||||||
|
for (size_t i = 0; i != local_discriminators.size(); ++i)
|
||||||
|
{
|
||||||
|
auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
|
||||||
|
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
|
||||||
|
{
|
||||||
|
result_local_discriminators.push_back(ColumnVariant::NULL_DISCRIMINATOR);
|
||||||
|
result_offsets.emplace_back();
|
||||||
|
}
|
||||||
|
else if (global_discr == shared_variant_discr)
|
||||||
|
{
|
||||||
|
result_local_discriminators.push_back(result_variant_column.localDiscriminatorByGlobal(result_shared_variant_discr));
|
||||||
|
result_offsets.push_back(result_shared_variant.size());
|
||||||
|
result_shared_variant.insertFrom(shared_variant, offsets[i]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto result_global_discr = old_to_new_discriminators[global_discr];
|
||||||
|
if (result_global_discr == result_shared_variant_discr)
|
||||||
|
{
|
||||||
|
result_local_discriminators.push_back(result_variant_column.localDiscriminatorByGlobal(result_shared_variant_discr));
|
||||||
|
result_offsets.push_back(result_shared_variant.size());
|
||||||
|
ColumnDynamic::serializeValueIntoSharedVariant(
|
||||||
|
result_shared_variant,
|
||||||
|
variant_column.getVariantByGlobalDiscriminator(global_discr),
|
||||||
|
variants[global_discr],
|
||||||
|
variants[global_discr]->getDefaultSerialization(),
|
||||||
|
offsets[i]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result_local_discriminators.push_back(result_variant_column.localDiscriminatorByGlobal(result_global_discr));
|
||||||
|
result_offsets.push_back(offsets[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Otherwise some variants should go to the shared variant. In this case we can just insert all
|
|
||||||
/// the data into resulting column and it will do all the logic with shared variant.
|
|
||||||
auto result_dynamic_column = ColumnDynamic::create(to_max_types);
|
|
||||||
result_dynamic_column->insertRangeFrom(column_dynamic, 0, column_dynamic.size());
|
|
||||||
return result_dynamic_column;
|
return result_dynamic_column;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
600000 UInt64 false
|
||||||
|
400000 String true
|
@ -0,0 +1,9 @@
|
|||||||
|
set allow_experimental_dynamic_type=1;
|
||||||
|
|
||||||
|
drop table if exists test;
|
||||||
|
create table test (d Dynamic) engine=MergeTree order by tuple() settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1;
|
||||||
|
insert into test select number < 600000 ? number::Dynamic : ('str_' || number)::Dynamic from numbers(1000000);
|
||||||
|
alter table test modify column d Dynamic(max_types=1);
|
||||||
|
select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d);
|
||||||
|
drop table test;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user