#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int NO_COMMON_TYPE; } namespace { String typeToString(const DataTypePtr & type) { return type->getName(); } String typeToString(const TypeIndex & type) { return String(magic_enum::enum_name(type)); } template String getExceptionMessagePrefix(const DataTypes & types) { WriteBufferFromOwnString res; bool first = true; for (const auto & type : types) { if (!first) res << ", "; first = false; res << typeToString(type); } return res.str(); } template DataTypePtr throwOrReturn(const DataTypes & types, std::string_view message_suffix, int error_code) { if constexpr (on_error == LeastSupertypeOnError::String) return std::make_shared(); if constexpr (on_error == LeastSupertypeOnError::Null) return nullptr; if (message_suffix.empty()) throw Exception(error_code, "There is no supertype for types {}", getExceptionMessagePrefix(types)); throw Exception(error_code, "There is no supertype for types {} {}", getExceptionMessagePrefix(types), message_suffix); } template DataTypePtr getNumericType(const TypeIndexSet & types) { bool all_numbers = true; size_t max_bits_of_signed_integer = 0; size_t max_bits_of_unsigned_integer = 0; size_t max_mantissa_bits_of_floating = 0; auto maximize = [](size_t & what, size_t value) { if (value > what) what = value; }; for (const auto & type : types) { if (type == TypeIndex::UInt8) maximize(max_bits_of_unsigned_integer, 8); else if (type == TypeIndex::UInt16) maximize(max_bits_of_unsigned_integer, 16); else if (type == TypeIndex::UInt32 || type == TypeIndex::IPv4) maximize(max_bits_of_unsigned_integer, 32); else if (type == TypeIndex::UInt64) maximize(max_bits_of_unsigned_integer, 64); else if (type == TypeIndex::UInt128) maximize(max_bits_of_unsigned_integer, 128); else if (type == TypeIndex::UInt256) maximize(max_bits_of_unsigned_integer, 256); else if (type == TypeIndex::Int8 || type == TypeIndex::Enum8) maximize(max_bits_of_signed_integer, 8); else if (type == TypeIndex::Int16 || type == TypeIndex::Enum16) maximize(max_bits_of_signed_integer, 16); else if (type == TypeIndex::Int32) maximize(max_bits_of_signed_integer, 32); else if (type == TypeIndex::Int64) maximize(max_bits_of_signed_integer, 64); else if (type == TypeIndex::Int128) maximize(max_bits_of_signed_integer, 128); else if (type == TypeIndex::Int256) maximize(max_bits_of_signed_integer, 256); else if (type == TypeIndex::Float32) maximize(max_mantissa_bits_of_floating, 24); else if (type == TypeIndex::Float64) maximize(max_mantissa_bits_of_floating, 53); else if (type != TypeIndex::Nothing) all_numbers = false; } if (max_bits_of_signed_integer || max_bits_of_unsigned_integer || max_mantissa_bits_of_floating) { if (!all_numbers) return throwOrReturn(types, "because some of them are numbers and some of them are not", ErrorCodes::NO_COMMON_TYPE); /// If there are signed and unsigned types of same bit-width, the result must be signed number with at least one more bit. /// Example, common of Int32, UInt32 = Int64. size_t min_bit_width_of_integer = std::max(max_bits_of_signed_integer, max_bits_of_unsigned_integer); /// If unsigned is not covered by signed. if (max_bits_of_signed_integer && max_bits_of_unsigned_integer >= max_bits_of_signed_integer) { // Because 128 and 256 bit integers are significantly slower, we should not promote to them. // But if we already have wide numbers, promotion is necessary. if (min_bit_width_of_integer != 64) ++min_bit_width_of_integer; else return throwOrReturn(types, "because some of them are signed integers and some are unsigned integers," " but there is no signed integer type, that can exactly represent all required unsigned integer values", ErrorCodes::NO_COMMON_TYPE); } /// If the result must be floating. if (max_mantissa_bits_of_floating) { size_t min_mantissa_bits = std::max(min_bit_width_of_integer, max_mantissa_bits_of_floating); if (min_mantissa_bits <= 24) return std::make_shared(); else if (min_mantissa_bits <= 53) return std::make_shared(); else return throwOrReturn(types, " because some of them are integers and some are floating point," " but there is no floating point type, that can exactly represent all required integers", ErrorCodes::NO_COMMON_TYPE); } /// If the result must be signed integer. if (max_bits_of_signed_integer) { if (min_bit_width_of_integer <= 8) return std::make_shared(); else if (min_bit_width_of_integer <= 16) return std::make_shared(); else if (min_bit_width_of_integer <= 32) return std::make_shared(); else if (min_bit_width_of_integer <= 64) return std::make_shared(); else if (min_bit_width_of_integer <= 128) return std::make_shared(); else if (min_bit_width_of_integer <= 256) return std::make_shared(); else return throwOrReturn(types, " because some of them are signed integers and some are unsigned integers," " but there is no signed integer type, that can exactly represent all required unsigned integer values", ErrorCodes::NO_COMMON_TYPE); } /// All unsigned. { if (min_bit_width_of_integer <= 8) return std::make_shared(); else if (min_bit_width_of_integer <= 16) return std::make_shared(); else if (min_bit_width_of_integer <= 32) return std::make_shared(); else if (min_bit_width_of_integer <= 64) return std::make_shared(); else if (min_bit_width_of_integer <= 128) return std::make_shared(); else if (min_bit_width_of_integer <= 256) return std::make_shared(); else return throwOrReturn(types, " but as all data types are unsigned integers, we must have found maximum unsigned integer type", ErrorCodes::NO_COMMON_TYPE); } } return {}; } /// Check if we can convert UInt64 to Int64 to avoid error "There is no supertype for types UInt64, Int64" /// during inferring field types. /// Example: /// [-3236599669630092879, 5607475129431807682] /// First field is inferred as Int64, but second one as UInt64, although it also can be Int64. /// We don't support Int128 as supertype for Int64 and UInt64, because Int128 is inefficient. /// But in this case the result type can be inferred as Array(Int64). void convertUInt64toInt64IfPossible(const DataTypes & types, TypeIndexSet & types_set) { /// Check if we have UInt64 and at least one Integer type. if (!types_set.contains(TypeIndex::UInt64) || (!types_set.contains(TypeIndex::Int8) && !types_set.contains(TypeIndex::Int16) && !types_set.contains(TypeIndex::Int32) && !types_set.contains(TypeIndex::Int64))) return; bool all_uint64_can_be_int64 = true; for (const auto & type : types) { if (const auto * uint64_type = typeid_cast(type.get())) all_uint64_can_be_int64 &= uint64_type->canUnsignedBeSigned(); } if (all_uint64_can_be_int64) { types_set.erase(TypeIndex::UInt64); types_set.insert(TypeIndex::Int64); } } } template DataTypePtr getLeastSupertype(const DataTypes & types) { /// Trivial cases if (types.empty()) return std::make_shared(); if (types.size() == 1) return types[0]; /// All types are equal { bool all_equal = true; for (size_t i = 1, size = types.size(); i < size; ++i) { if (!types[i]->equals(*types[0])) { all_equal = false; break; } } if (all_equal) return types[0]; } /// Recursive rules /// If there are Nothing types, skip them { DataTypes non_nothing_types; non_nothing_types.reserve(types.size()); for (const auto & type : types) if (!typeid_cast(type.get())) non_nothing_types.emplace_back(type); if (non_nothing_types.size() < types.size()) return getLeastSupertype(non_nothing_types); } /// For Arrays { bool have_array = false; bool all_arrays = true; DataTypes nested_types; nested_types.reserve(types.size()); for (const auto & type : types) { if (const DataTypeArray * type_array = typeid_cast(type.get())) { have_array = true; nested_types.emplace_back(type_array->getNestedType()); } else all_arrays = false; } if (have_array) { if (!all_arrays) return throwOrReturn(types, "because some of them are Array and some of them are not", ErrorCodes::NO_COMMON_TYPE); auto nested_type = getLeastSupertype(nested_types); /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype, /// nested_type will be nullptr, we should return nullptr in this case. if (!nested_type) return nullptr; return std::make_shared(nested_type); } } /// For tuples { bool have_tuple = false; bool all_tuples = true; size_t tuple_size = 0; std::vector nested_types; for (const auto & type : types) { if (const DataTypeTuple * type_tuple = typeid_cast(type.get())) { if (!have_tuple) { tuple_size = type_tuple->getElements().size(); nested_types.resize(tuple_size); for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx) nested_types[elem_idx].reserve(types.size()); } else if (tuple_size != type_tuple->getElements().size()) return throwOrReturn(types, "because Tuples have different sizes", ErrorCodes::NO_COMMON_TYPE); have_tuple = true; for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx) nested_types[elem_idx].emplace_back(type_tuple->getElements()[elem_idx]); } else all_tuples = false; } if (have_tuple) { if (!all_tuples) return throwOrReturn(types, "because some of them are Tuple and some of them are not", ErrorCodes::NO_COMMON_TYPE); DataTypes common_tuple_types(tuple_size); for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx) { auto common_type = getLeastSupertype(nested_types[elem_idx]); /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype, /// common_type will be nullptr, we should return nullptr in this case. if (!common_type) return nullptr; common_tuple_types[elem_idx] = common_type; } return std::make_shared(common_tuple_types); } } /// For maps { bool have_maps = false; bool all_maps = true; DataTypes key_types; DataTypes value_types; key_types.reserve(types.size()); value_types.reserve(types.size()); for (const auto & type : types) { if (const DataTypeMap * type_map = typeid_cast(type.get())) { have_maps = true; key_types.emplace_back(type_map->getKeyType()); value_types.emplace_back(type_map->getValueType()); } else all_maps = false; } if (have_maps) { if (!all_maps) return throwOrReturn(types, "because some of them are Maps and some of them are not", ErrorCodes::NO_COMMON_TYPE); auto keys_common_type = getLeastSupertype(key_types); auto values_common_type = getLeastSupertype(value_types); /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype for keys or values, /// keys_common_type or values_common_type will be nullptr, we should return nullptr in this case. if (!keys_common_type || !values_common_type) return nullptr; return std::make_shared(keys_common_type, values_common_type); } } /// For LowCardinality. This is above Nullable, because LowCardinality can contain Nullable but cannot be inside Nullable. { bool have_low_cardinality = false; bool have_not_low_cardinality = false; DataTypes nested_types; nested_types.reserve(types.size()); for (const auto & type : types) { if (const DataTypeLowCardinality * type_low_cardinality = typeid_cast(type.get())) { have_low_cardinality = true; nested_types.emplace_back(type_low_cardinality->getDictionaryType()); } else { have_not_low_cardinality = true; nested_types.emplace_back(type); } } /// All LowCardinality gives LowCardinality. /// LowCardinality with high cardinality gives high cardinality. if (have_low_cardinality) { if (have_not_low_cardinality) return getLeastSupertype(nested_types); else { auto nested_type = getLeastSupertype(nested_types); /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype, /// nested_type will be nullptr, we should return nullptr in this case. if (!nested_type) return nullptr; return std::make_shared(nested_type); } } } /// For Nullable { bool have_nullable = false; DataTypes nested_types; nested_types.reserve(types.size()); for (const auto & type : types) { if (const DataTypeNullable * type_nullable = typeid_cast(type.get())) { have_nullable = true; if (!type_nullable->onlyNull()) nested_types.emplace_back(type_nullable->getNestedType()); } else nested_types.emplace_back(type); } if (have_nullable) { auto nested_type = getLeastSupertype(nested_types); /// When on_error == LeastSupertypeOnError::Null and we cannot get least supertype, /// nested_type will be nullptr, we should return nullptr in this case. if (!nested_type) return nullptr; return std::make_shared(nested_type); } } /// Non-recursive rules TypeIndexSet type_ids; for (const auto & type : types) type_ids.insert(type->getTypeId()); /// For String and FixedString, or for different FixedStrings, the common type is String. /// No other types are compatible with Strings. TODO Enums? { size_t have_string = type_ids.count(TypeIndex::String); size_t have_fixed_string = type_ids.count(TypeIndex::FixedString); if (have_string || have_fixed_string) { bool all_strings = type_ids.size() == (have_string + have_fixed_string); if (!all_strings) return throwOrReturn(types, "because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE); return std::make_shared(); } } /// For Date and DateTime/DateTime64, the common type is DateTime/DateTime64. No other types are compatible. { size_t have_date = type_ids.count(TypeIndex::Date); size_t have_date32 = type_ids.count(TypeIndex::Date32); size_t have_datetime = type_ids.count(TypeIndex::DateTime); size_t have_datetime64 = type_ids.count(TypeIndex::DateTime64); if (have_date || have_date32 || have_datetime || have_datetime64) { bool all_date_or_datetime = type_ids.size() == (have_date + have_date32 + have_datetime + have_datetime64); if (!all_date_or_datetime) return throwOrReturn(types, "because some of them are Date/Date32/DateTime/DateTime64 and some of them are not", ErrorCodes::NO_COMMON_TYPE); if (have_datetime64 == 0 && have_date32 == 0) { for (const auto & type : types) { if (isDateTime(type)) return type; } return std::make_shared(); } /// For Date and Date32, the common type is Date32 if (have_datetime == 0 && have_datetime64 == 0) { for (const auto & type : types) { if (isDate32(type)) return type; } } /// For Datetime and Date32, the common type is Datetime64 if (have_datetime == 1 && have_date32 == 1 && have_datetime64 == 0) { return std::make_shared(0); } UInt8 max_scale = 0; size_t max_scale_date_time_index = 0; for (size_t i = 0; i < types.size(); ++i) { const auto & type = types[i]; if (const auto * date_time64_type = typeid_cast(type.get())) { const auto scale = date_time64_type->getScale(); if (scale >= max_scale) { max_scale_date_time_index = i; max_scale = scale; } } } return types[max_scale_date_time_index]; } } /// Decimals { size_t have_decimal32 = type_ids.count(TypeIndex::Decimal32); size_t have_decimal64 = type_ids.count(TypeIndex::Decimal64); size_t have_decimal128 = type_ids.count(TypeIndex::Decimal128); size_t have_decimal256 = type_ids.count(TypeIndex::Decimal256); if (have_decimal32 || have_decimal64 || have_decimal128 || have_decimal256) { size_t num_supported = have_decimal32 + have_decimal64 + have_decimal128 + have_decimal256; std::array int_ids = {TypeIndex::Int8, TypeIndex::UInt8, TypeIndex::Int16, TypeIndex::UInt16, TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64}; TypeIndex max_int = TypeIndex::Nothing; for (auto int_id : int_ids) { size_t num = type_ids.count(int_id); num_supported += num; if (num) max_int = int_id; } if (num_supported != type_ids.size()) return throwOrReturn(types, "because some of them have no lossless conversion to Decimal", ErrorCodes::NO_COMMON_TYPE); UInt32 max_scale = 0; for (const auto & type : types) { auto type_id = type->getTypeId(); if (type_id != TypeIndex::Decimal32 && type_id != TypeIndex::Decimal64 && type_id != TypeIndex::Decimal128 && type_id != TypeIndex::Decimal256) { continue; } UInt32 scale = getDecimalScale(*type); if (scale > max_scale) max_scale = scale; } UInt32 min_precision = max_scale + leastDecimalPrecisionFor(max_int); /// special cases Int32 -> Dec32, Int64 -> Dec64 if (max_scale == 0) { if (max_int == TypeIndex::Int32) min_precision = DataTypeDecimal::maxPrecision(); else if (max_int == TypeIndex::Int64) min_precision = DataTypeDecimal::maxPrecision(); } if (min_precision > DataTypeDecimal::maxPrecision()) return throwOrReturn(types, "because the least supertype is Decimal(" + toString(min_precision) + ',' + toString(max_scale) + ')', ErrorCodes::NO_COMMON_TYPE); if (have_decimal256 || min_precision > DataTypeDecimal::maxPrecision()) return std::make_shared>(DataTypeDecimal::maxPrecision(), max_scale); if (have_decimal128 || min_precision > DataTypeDecimal::maxPrecision()) return std::make_shared>(DataTypeDecimal::maxPrecision(), max_scale); if (have_decimal64 || min_precision > DataTypeDecimal::maxPrecision()) return std::make_shared>(DataTypeDecimal::maxPrecision(), max_scale); return std::make_shared>(DataTypeDecimal::maxPrecision(), max_scale); } } /// For numeric types, the most complicated part. { /// First, if we have signed integers, try to convert all UInt64 to Int64 if possible. convertUInt64toInt64IfPossible(types, type_ids); auto numeric_type = getNumericType(type_ids); if (numeric_type) return numeric_type; } /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases). return throwOrReturn(types, "", ErrorCodes::NO_COMMON_TYPE); } DataTypePtr getLeastSupertypeOrString(const DataTypes & types) { return getLeastSupertype(types); } DataTypePtr tryGetLeastSupertype(const DataTypes & types) { return getLeastSupertype(types); } template DataTypePtr getLeastSupertype(const TypeIndexSet & types) { if (types.empty()) return std::make_shared(); if (types.size() == 1) { WhichDataType which(*types.begin()); if (which.isNothing()) return std::make_shared(); #define DISPATCH(TYPE) \ if (which.idx == TypeIndex::TYPE) \ return std::make_shared>(); /// NOLINT FOR_NUMERIC_TYPES(DISPATCH) #undef DISPATCH if (which.isString()) return std::make_shared(); return throwOrReturn(types, "because cannot get common type by type indexes with non-simple types", ErrorCodes::NO_COMMON_TYPE); } if (types.contains(TypeIndex::String)) { bool only_string = types.size() == 2 && types.contains(TypeIndex::Nothing); if (!only_string) return throwOrReturn(types, "because some of them are String and some of them are not", ErrorCodes::NO_COMMON_TYPE); return std::make_shared(); } auto numeric_type = getNumericType(types); if (numeric_type) return numeric_type; return throwOrReturn(types, "", ErrorCodes::NO_COMMON_TYPE); } DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types) { return getLeastSupertype(types); } DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types) { return getLeastSupertype(types); } template DataTypePtr getLeastSupertype(const DataTypes & types); template DataTypePtr getLeastSupertype(const TypeIndexSet & types); }