Review changes

This commit is contained in:
Blargian 2024-06-11 11:11:06 +02:00
parent 418fc7f443
commit 9cf11a210f
3 changed files with 35 additions and 65 deletions

View File

@ -92,6 +92,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
{"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
}},
{"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"},
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
@ -103,8 +104,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
{"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
{"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
{"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."},
{"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
{"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
{"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},
{"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."},

View File

@ -239,16 +239,6 @@ namespace
return true;
}
bool checkIfTypesContainVariant(const DataTypes & types)
{
for (size_t i = 0; i < types.size(); ++i)
{
if (isVariant(types[i]))
return true;
}
return false;
}
void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
type_indexes.clear();
@ -321,49 +311,28 @@ namespace
/// if setting 'try_infer_variant' is true then we convert to type variant.
void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
auto typesAreEqual = checkIfTypesAreEqual(data_types);
auto typesContainVariant = checkIfTypesContainVariant(data_types);
if (typesAreEqual)
if (checkIfTypesAreEqual(data_types))
return;
DataTypes new_data_types;
TypeIndexesSet new_type_indexes;
std::shared_ptr<DataTypeVariant> variant_type;
/// extract the nested types of variant and make a new variant with the nested types and the other type.
/// eg. Type 1: variant<String, Array>, Type 2: Date -> variant<String, Array, Date>.
if (typesContainVariant)
DataTypes variant_types;
for (const auto & type : data_types)
{
DataTypes extracted_types;
for (size_t i=0; i<data_types.size(); i++)
if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(type.get()))
{
if (isVariant(data_types[i]))
{
if (const auto * variant = typeid_cast<const DataTypeVariant *>(data_types[i].get()))
extracted_types = variant->getVariants();
}
else
extracted_types.push_back(data_types[i]);
const auto & current_variants = variant_type->getVariants();
variant_types.insert(variant_types.end(), current_variants.begin(), current_variants.end());
}
else
{
variant_types.push_back(type);
}
variant_type = std::make_shared<DataTypeVariant>(extracted_types);
}
else
{
variant_type = std::make_shared<DataTypeVariant>(data_types);
}
size_t i = 0;
while (i != data_types.size())
{
new_data_types.push_back(variant_type);
new_type_indexes.insert(TypeIndex::Variant);
i++;
}
auto variant_type = std::make_shared<DataTypeVariant>(variant_types);
data_types.clear();
type_indexes.clear();
data_types = new_data_types;
type_indexes = new_type_indexes;
for (auto & type : data_types)
type = variant_type;
type_indexes = {TypeIndex::Variant};
}
/// If we have only Date and DateTime types, convert Date to DateTime,
@ -703,11 +672,12 @@ namespace
if (settings.try_infer_dates || settings.try_infer_datetimes)
transformDatesAndDateTimes(data_types, type_indexes);
if (settings.try_infer_variant)
transformVariant(data_types, type_indexes);
if constexpr (!is_json)
{
if (settings.try_infer_variant)
transformVariant(data_types, type_indexes);
return;
}
/// Check settings specific for JSON formats.
@ -740,11 +710,12 @@ namespace
/// If there is at least one non Nothing type, change all Nothing types to it.
transformNothingComplexTypes(data_types, type_indexes);
if (settings.try_infer_variant)
transformVariant(data_types, type_indexes);
if constexpr (!is_json)
{
if (settings.try_infer_variant)
transformVariant(data_types, type_indexes);
return;
}
/// Convert JSON tuples with same nested types to arrays.
transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes);

View File

@ -1,16 +1,16 @@
┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ arr ┃ toTypeName(arr)
┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ arr ┃ toTypeName(arr) ┃
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
1. │ ['1','Hello',(32)] │ Array(Variant(String, Tuple(
a Nullable(Int64)))) │
└──────────────────┴─────────────────────────────────────────────────────────────┘
┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━
┃ x ┃ toTypeName(x)
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━
1. │ 42 │ Variant(Int64, String) │
├───────┼────────────────────────
2. │ Hello │ Variant(Int64, String) │
└───────┴────────────────────────
└────────────────────┴──────────────────────────────────────────────────────┘
┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
┃ x ┃ toTypeName(x) ┃
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
1. │ 42 │ Nullable(String) │
├───────┼──────────────────┤
2. │ Hello │ Nullable(String) │
└───────┴──────────────────┘
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ x ┃ toTypeName(x) ┃
┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩