mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Update new prepareForSquashing method for ColumnDynamic
This commit is contained in:
parent
03182c7a8f
commit
8136e6a452
@ -987,7 +987,8 @@ void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
|
|||||||
/// Internal variants of source dynamic columns may differ.
|
/// Internal variants of source dynamic columns may differ.
|
||||||
/// We want to preallocate memory for all variants we will have after squashing.
|
/// We want to preallocate memory for all variants we will have after squashing.
|
||||||
/// It may happen that the total number of variants in source columns will
|
/// It may happen that the total number of variants in source columns will
|
||||||
/// exceed the limit, in this case we will choose the most frequent variants.
|
/// exceed the limit, in this case we will choose the most frequent variants
|
||||||
|
/// and insert the rest types into the shared variant.
|
||||||
|
|
||||||
/// First, preallocate memory for variant discriminators and offsets.
|
/// First, preallocate memory for variant discriminators and offsets.
|
||||||
size_t new_size = size();
|
size_t new_size = size();
|
||||||
@ -1030,17 +1031,14 @@ void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
|
|||||||
|
|
||||||
DataTypePtr result_variant_type;
|
DataTypePtr result_variant_type;
|
||||||
/// Check if the number of all variants exceeds the limit.
|
/// Check if the number of all variants exceeds the limit.
|
||||||
if (all_variants.size() > max_dynamic_types || (all_variants.size() == max_dynamic_types && !total_variant_sizes.contains("String")))
|
if (!canAddNewVariants(0, all_variants.size()))
|
||||||
{
|
{
|
||||||
/// We want to keep the most frequent variants in the resulting dynamic column.
|
/// We want to keep the most frequent variants in the resulting dynamic column.
|
||||||
DataTypes result_variants;
|
DataTypes result_variants;
|
||||||
result_variants.reserve(max_dynamic_types);
|
result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant.
|
||||||
/// Add variants from current variant column as we will not rewrite it.
|
/// Add variants from current variant column as we will not rewrite it.
|
||||||
for (const auto & variant : assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants())
|
for (const auto & variant : assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants())
|
||||||
result_variants.push_back(variant);
|
result_variants.push_back(variant);
|
||||||
/// Add String variant in advance (if we didn't add it yet) as we must have it across variants when we reach the limit.
|
|
||||||
if (!variant_info.variant_name_to_discriminator.contains("String"))
|
|
||||||
result_variants.push_back(std::make_shared<DataTypeString>());
|
|
||||||
|
|
||||||
/// Create list of remaining variants with their sizes and sort it.
|
/// Create list of remaining variants with their sizes and sort it.
|
||||||
std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
|
std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes;
|
||||||
@ -1049,15 +1047,18 @@ void ColumnDynamic::prepareForSquashing(const Columns & source_columns)
|
|||||||
{
|
{
|
||||||
/// Add variant to the list only of we didn't add it yet.
|
/// Add variant to the list only of we didn't add it yet.
|
||||||
auto variant_name = variant->getName();
|
auto variant_name = variant->getName();
|
||||||
if (variant_name != "String" && !variant_info.variant_name_to_discriminator.contains(variant_name))
|
if (!variant_info.variant_name_to_discriminator.contains(variant_name))
|
||||||
variants_with_sizes.emplace_back(total_variant_sizes[variant->getName()], variant);
|
variants_with_sizes.emplace_back(total_variant_sizes[variant_name], variant);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
|
std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
|
||||||
/// Add the most frequent variants until we reach max_dynamic_types.
|
/// Add the most frequent variants until we reach max_dynamic_types.
|
||||||
size_t num_new_variants = max_dynamic_types - result_variants.size();
|
for (const auto & [_, new_variant] : variants_with_sizes)
|
||||||
for (size_t i = 0; i != num_new_variants; ++i)
|
{
|
||||||
result_variants.push_back(variants_with_sizes[i].second);
|
if (!canAddNewVariant(result_variants.size()))
|
||||||
|
break;
|
||||||
|
result_variants.push_back(new_variant);
|
||||||
|
}
|
||||||
|
|
||||||
result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
|
result_variant_type = std::make_shared<DataTypeVariant>(result_variants);
|
||||||
}
|
}
|
||||||
|
@ -117,7 +117,7 @@ bool DataTypeVariant::equals(const IDataType & rhs) const
|
|||||||
|
|
||||||
/// The same data types with different custom names considered different.
|
/// The same data types with different custom names considered different.
|
||||||
/// For example, UInt8 and Bool.
|
/// For example, UInt8 and Bool.
|
||||||
if ((variants[i]->hasCustomName() || rhs_variant.variants[i]) && variants[i]->getName() != rhs_variant.variants[i]->getName())
|
if ((variants[i]->hasCustomName() || rhs_variant.variants[i]->hasCustomName()) && variants[i]->getName() != rhs_variant.variants[i]->getName())
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,8 +1,12 @@
|
|||||||
Array(UInt8)
|
1
|
||||||
None
|
Array(UInt8) true
|
||||||
UInt64
|
None false
|
||||||
None
|
UInt64 false
|
||||||
String
|
2
|
||||||
UInt64
|
Array(UInt8) true
|
||||||
String
|
None false
|
||||||
UInt64
|
UInt64 false
|
||||||
|
3
|
||||||
|
Array(UInt8) true
|
||||||
|
String false
|
||||||
|
UInt64 true
|
||||||
|
@ -4,17 +4,20 @@ set max_block_size = 1000;
|
|||||||
drop table if exists test;
|
drop table if exists test;
|
||||||
|
|
||||||
create table test (d Dynamic) engine=MergeTree order by tuple();
|
create table test (d Dynamic) engine=MergeTree order by tuple();
|
||||||
insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000);
|
insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
|
||||||
select distinct dynamicType(d) as type from test order by type;
|
select '1';
|
||||||
|
select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
|
||||||
|
|
||||||
drop table test;
|
drop table test;
|
||||||
create table test (d Dynamic(max_types=2)) engine=MergeTree order by tuple();
|
create table test (d Dynamic(max_types=1)) engine=MergeTree order by tuple();
|
||||||
insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000);
|
insert into test select multiIf(number < 1000, NULL::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
|
||||||
select distinct dynamicType(d) as type from test order by type;
|
select '2';
|
||||||
|
select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
|
||||||
|
|
||||||
truncate table test;
|
truncate table test;
|
||||||
insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=2), number < 3000, range(number % 5)::Dynamic(max_types=2), number::Dynamic(max_types=2)) from numbers(1000000);
|
insert into test select multiIf(number < 1000, 'Str'::Dynamic(max_types=1), number < 3000, range(number % 5)::Dynamic(max_types=1), number::Dynamic(max_types=1)) from numbers(1000000);
|
||||||
select distinct dynamicType(d) as type from test order by type;
|
select '3';
|
||||||
|
select distinct dynamicType(d) as type, isDynamicElementInSharedData(d) as flag from test order by type;
|
||||||
|
|
||||||
drop table test;
|
drop table test;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user