mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-19 16:20:50 +00:00
Review changes
This commit is contained in:
parent
418fc7f443
commit
9cf11a210f
@ -92,6 +92,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
|
||||
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},
|
||||
{"s3_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in S3 table engine"},
|
||||
{"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
|
||||
}},
|
||||
{"24.5", {{"allow_deprecated_functions", true, false, "Allow usage of deprecated functions"},
|
||||
{"allow_experimental_join_condition", false, false, "Support join with inequal conditions which involve columns from both left and right table. e.g. t1.y < t2.y."},
|
||||
@ -103,8 +104,6 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."},
|
||||
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
|
||||
{"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"},
|
||||
{"output_format_pretty_preserve_border_for_multiline_string", 1, 1, "Applies better rendering for multiline strings."},
|
||||
{"input_format_try_infer_variants", 0, 0, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
|
||||
{"cast_string_to_dynamic_use_inference", false, false, "Add setting to allow converting String to Dynamic through parsing"},
|
||||
{"allow_experimental_dynamic_type", false, false, "Add new experimental Dynamic type"},
|
||||
{"azure_max_blocks_in_multipart_upload", 50000, 50000, "Maximum number of blocks in multipart upload for Azure."},
|
||||
|
@ -239,16 +239,6 @@ namespace
|
||||
return true;
|
||||
}
|
||||
|
||||
bool checkIfTypesContainVariant(const DataTypes & types)
|
||||
{
|
||||
for (size_t i = 0; i < types.size(); ++i)
|
||||
{
|
||||
if (isVariant(types[i]))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
type_indexes.clear();
|
||||
@ -321,49 +311,28 @@ namespace
|
||||
/// if setting 'try_infer_variant' is true then we convert to type variant.
|
||||
void transformVariant(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
auto typesAreEqual = checkIfTypesAreEqual(data_types);
|
||||
auto typesContainVariant = checkIfTypesContainVariant(data_types);
|
||||
if (typesAreEqual)
|
||||
if (checkIfTypesAreEqual(data_types))
|
||||
return;
|
||||
|
||||
DataTypes new_data_types;
|
||||
TypeIndexesSet new_type_indexes;
|
||||
std::shared_ptr<DataTypeVariant> variant_type;
|
||||
|
||||
/// extract the nested types of variant and make a new variant with the nested types and the other type.
|
||||
/// eg. Type 1: variant<String, Array>, Type 2: Date -> variant<String, Array, Date>.
|
||||
if (typesContainVariant)
|
||||
DataTypes variant_types;
|
||||
for (const auto & type : data_types)
|
||||
{
|
||||
DataTypes extracted_types;
|
||||
for (size_t i=0; i<data_types.size(); i++)
|
||||
if (const auto * variant_type = typeid_cast<const DataTypeVariant *>(type.get()))
|
||||
{
|
||||
if (isVariant(data_types[i]))
|
||||
{
|
||||
if (const auto * variant = typeid_cast<const DataTypeVariant *>(data_types[i].get()))
|
||||
extracted_types = variant->getVariants();
|
||||
}
|
||||
else
|
||||
extracted_types.push_back(data_types[i]);
|
||||
const auto & current_variants = variant_type->getVariants();
|
||||
variant_types.insert(variant_types.end(), current_variants.begin(), current_variants.end());
|
||||
}
|
||||
else
|
||||
{
|
||||
variant_types.push_back(type);
|
||||
}
|
||||
variant_type = std::make_shared<DataTypeVariant>(extracted_types);
|
||||
}
|
||||
else
|
||||
{
|
||||
variant_type = std::make_shared<DataTypeVariant>(data_types);
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
while (i != data_types.size())
|
||||
{
|
||||
new_data_types.push_back(variant_type);
|
||||
new_type_indexes.insert(TypeIndex::Variant);
|
||||
i++;
|
||||
}
|
||||
auto variant_type = std::make_shared<DataTypeVariant>(variant_types);
|
||||
|
||||
data_types.clear();
|
||||
type_indexes.clear();
|
||||
data_types = new_data_types;
|
||||
type_indexes = new_type_indexes;
|
||||
for (auto & type : data_types)
|
||||
type = variant_type;
|
||||
type_indexes = {TypeIndex::Variant};
|
||||
}
|
||||
|
||||
/// If we have only Date and DateTime types, convert Date to DateTime,
|
||||
@ -703,11 +672,12 @@ namespace
|
||||
if (settings.try_infer_dates || settings.try_infer_datetimes)
|
||||
transformDatesAndDateTimes(data_types, type_indexes);
|
||||
|
||||
if (settings.try_infer_variant)
|
||||
transformVariant(data_types, type_indexes);
|
||||
|
||||
if constexpr (!is_json)
|
||||
{
|
||||
if (settings.try_infer_variant)
|
||||
transformVariant(data_types, type_indexes);
|
||||
return;
|
||||
}
|
||||
|
||||
/// Check settings specific for JSON formats.
|
||||
|
||||
@ -740,11 +710,12 @@ namespace
|
||||
/// If there is at least one non Nothing type, change all Nothing types to it.
|
||||
transformNothingComplexTypes(data_types, type_indexes);
|
||||
|
||||
if (settings.try_infer_variant)
|
||||
transformVariant(data_types, type_indexes);
|
||||
|
||||
if constexpr (!is_json)
|
||||
{
|
||||
if (settings.try_infer_variant)
|
||||
transformVariant(data_types, type_indexes);
|
||||
return;
|
||||
}
|
||||
|
||||
/// Convert JSON tuples with same nested types to arrays.
|
||||
transformTuplesWithEqualNestedTypesToArrays(data_types, type_indexes);
|
||||
|
@ -1,16 +1,16 @@
|
||||
┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
||||
┃ arr ┃ toTypeName(arr) ┃
|
||||
┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
|
||||
1. │ [1,'Hello',(32)] │ Array(Variant(Int64, String, Tuple(
|
||||
┏━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
||||
┃ arr ┃ toTypeName(arr) ┃
|
||||
┡━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
|
||||
1. │ ['1','Hello',(32)] │ Array(Variant(String, Tuple(
|
||||
a Nullable(Int64)))) │
|
||||
└──────────────────┴─────────────────────────────────────────────────────────────┘
|
||||
┏━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
|
||||
┃ x ┃ toTypeName(x) ┃
|
||||
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
|
||||
1. │ 42 │ Variant(Int64, String) │
|
||||
├───────┼────────────────────────┤
|
||||
2. │ Hello │ Variant(Int64, String) │
|
||||
└───────┴────────────────────────┘
|
||||
└────────────────────┴──────────────────────────────────────────────────────┘
|
||||
┏━━━━━━━┳━━━━━━━━━━━━━━━━━━┓
|
||||
┃ x ┃ toTypeName(x) ┃
|
||||
┡━━━━━━━╇━━━━━━━━━━━━━━━━━━┩
|
||||
1. │ 42 │ Nullable(String) │
|
||||
├───────┼──────────────────┤
|
||||
2. │ Hello │ Nullable(String) │
|
||||
└───────┴──────────────────┘
|
||||
┏━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
||||
┃ x ┃ toTypeName(x) ┃
|
||||
┡━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
|
||||
|
Loading…
Reference in New Issue
Block a user