diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index d94f0d90e1b..c39c57ede7f 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -4487,26 +4487,124 @@ private: return [to_max_types] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr { - const auto & column_dynamic = assert_cast(*arguments[0].column); + const auto & dynamic_column = assert_cast(*arguments[0].column); /// We should use the same limit as already used in column and change only global limit. /// It's needed because shared variant should contain values only when limit is exceeded, /// so if there are already some data, we cannot increase the limit. - return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types); + return ColumnDynamic::create(dynamic_column.getVariantColumnPtr(), dynamic_column.getVariantInfo(), dynamic_column.getMaxDynamicTypes(), to_max_types); }; } return [to_max_types] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t) -> ColumnPtr { - const auto & column_dynamic = assert_cast(*arguments[0].column); + const auto & dynamic_column = assert_cast(*arguments[0].column); /// If real limit in the column is not greater than desired, just use the same variant column. - if (column_dynamic.getMaxDynamicTypes() <= to_max_types) - return ColumnDynamic::create(column_dynamic.getVariantColumnPtr(), column_dynamic.getVariantInfo(), column_dynamic.getMaxDynamicTypes(), to_max_types); + if (dynamic_column.getMaxDynamicTypes() <= to_max_types) + return ColumnDynamic::create(dynamic_column.getVariantColumnPtr(), dynamic_column.getVariantInfo(), dynamic_column.getMaxDynamicTypes(), to_max_types); + + /// Otherwise some variants should go to the shared variant. We try to keep the most frequent variants. + const auto & variant_info = dynamic_column.getVariantInfo(); + const auto & variants = assert_cast(*variant_info.variant_type).getVariants(); + const auto & statistics = dynamic_column.getStatistics(); + const auto & variant_column = dynamic_column.getVariantColumn(); + auto shared_variant_discr = dynamic_column.getSharedVariantDiscriminator(); + std::vector> variants_with_sizes; + variants_with_sizes.reserve(variant_info.variant_names.size()); + for (const auto & [name, discr] : variant_info.variant_name_to_discriminator) + { + /// Don't include shared variant. + if (discr == shared_variant_discr) + continue; + + size_t size = variant_column.getVariantByGlobalDiscriminator(discr).size(); + /// If column has statistics from the data part, use size from it for consistency. + /// It's important to keep the same dynamic structure of the result column during ALTER. + if (statistics) + { + auto statistics_it = statistics->variants_statistics.find(name); + if (statistics_it != statistics->variants_statistics.end()) + size = statistics_it->second; + } + variants_with_sizes.emplace_back(size, name, variants[discr]); + } + + std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); + DataTypes result_variants; + result_variants.reserve(to_max_types + 1); /// +1 for shared variant. + /// Add new variants from sorted list until we reach to_max_types. + for (const auto & [size, name, type] : variants_with_sizes) + { + if (result_variants.size() < to_max_types) + result_variants.push_back(type); + else + break; + } + + /// Add shared variant. + result_variants.push_back(ColumnDynamic::getSharedVariantDataType()); + /// Create resulting Variant type and Dynamic column. + auto result_variant_type = std::make_shared(result_variants); + auto result_dynamic_column = ColumnDynamic::create(result_variant_type->createColumn(), result_variant_type, to_max_types, to_max_types); + const auto & result_variant_info = result_dynamic_column->getVariantInfo(); + auto & result_variant_column = result_dynamic_column->getVariantColumn(); + auto result_shared_variant_discr = result_dynamic_column->getSharedVariantDiscriminator(); + /// Create mapping from old discriminators to the new ones. + std::vector old_to_new_discriminators; + old_to_new_discriminators.resize(variant_info.variant_name_to_discriminator.size(), result_shared_variant_discr); + for (const auto & [name, discr] : result_variant_info.variant_name_to_discriminator) + { + auto old_discr = variant_info.variant_name_to_discriminator.at(name); + old_to_new_discriminators[old_discr] = discr; + /// Reuse old variant column if it's not shared variant. + if (discr != result_shared_variant_discr) + result_variant_column.getVariantPtrByGlobalDiscriminator(discr) = variant_column.getVariantPtrByGlobalDiscriminator(old_discr); + } + + const auto & local_discriminators = variant_column.getLocalDiscriminators(); + const auto & offsets = variant_column.getOffsets(); + const auto & shared_variant = dynamic_column.getSharedVariant(); + auto & result_local_discriminators = result_variant_column.getLocalDiscriminators(); + result_local_discriminators.reserve(local_discriminators.size()); + auto & result_offsets = result_variant_column.getOffsets(); + result_offsets.reserve(offsets.size()); + auto & result_shared_variant = result_dynamic_column->getSharedVariant(); + for (size_t i = 0; i != local_discriminators.size(); ++i) + { + auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]); + if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) + { + result_local_discriminators.push_back(ColumnVariant::NULL_DISCRIMINATOR); + result_offsets.emplace_back(); + } + else if (global_discr == shared_variant_discr) + { + result_local_discriminators.push_back(result_variant_column.localDiscriminatorByGlobal(result_shared_variant_discr)); + result_offsets.push_back(result_shared_variant.size()); + result_shared_variant.insertFrom(shared_variant, offsets[i]); + } + else + { + auto result_global_discr = old_to_new_discriminators[global_discr]; + if (result_global_discr == result_shared_variant_discr) + { + result_local_discriminators.push_back(result_variant_column.localDiscriminatorByGlobal(result_shared_variant_discr)); + result_offsets.push_back(result_shared_variant.size()); + ColumnDynamic::serializeValueIntoSharedVariant( + result_shared_variant, + variant_column.getVariantByGlobalDiscriminator(global_discr), + variants[global_discr], + variants[global_discr]->getDefaultSerialization(), + offsets[i]); + } + else + { + result_local_discriminators.push_back(result_variant_column.localDiscriminatorByGlobal(result_global_discr)); + result_offsets.push_back(offsets[i]); + } + } + } - /// Otherwise some variants should go to the shared variant. In this case we can just insert all - /// the data into resulting column and it will do all the logic with shared variant. - auto result_dynamic_column = ColumnDynamic::create(to_max_types); - result_dynamic_column->insertRangeFrom(column_dynamic, 0, column_dynamic.size()); return result_dynamic_column; }; } diff --git a/tests/queries/0_stateless/03249_dynamic_alter_consistency.reference b/tests/queries/0_stateless/03249_dynamic_alter_consistency.reference new file mode 100644 index 00000000000..a24c35449ed --- /dev/null +++ b/tests/queries/0_stateless/03249_dynamic_alter_consistency.reference @@ -0,0 +1,2 @@ +600000 UInt64 false +400000 String true diff --git a/tests/queries/0_stateless/03249_dynamic_alter_consistency.sql b/tests/queries/0_stateless/03249_dynamic_alter_consistency.sql new file mode 100644 index 00000000000..5840866628e --- /dev/null +++ b/tests/queries/0_stateless/03249_dynamic_alter_consistency.sql @@ -0,0 +1,9 @@ +set allow_experimental_dynamic_type=1; + +drop table if exists test; +create table test (d Dynamic) engine=MergeTree order by tuple() settings min_rows_for_wide_part=1, min_bytes_for_wide_part=1; +insert into test select number < 600000 ? number::Dynamic : ('str_' || number)::Dynamic from numbers(1000000); +alter table test modify column d Dynamic(max_types=1); +select count(), dynamicType(d), isDynamicElementInSharedData(d) from test group by dynamicType(d), isDynamicElementInSharedData(d); +drop table test; +