ClickHouse/src/Interpreters/convertFieldToType.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

553 lines
22 KiB
C++
Raw Normal View History

#include <Interpreters/convertFieldToType.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
2020-10-10 06:49:03 +00:00
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/DataTypeEnum.h>
2021-05-03 23:27:05 +00:00
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
2022-09-18 03:16:08 +00:00
#include <DataTypes/DataTypeAggregateFunction.h>
#include <Core/AccurateComparison.h>
2022-09-18 03:16:08 +00:00
2017-07-13 20:58:19 +00:00
#include <Common/typeid_cast.h>
2018-02-28 04:30:27 +00:00
#include <Common/NaNUtils.h>
2021-07-07 15:33:44 +00:00
#include <Common/FieldVisitorToString.h>
2022-09-18 03:16:08 +00:00
#include <Common/FieldVisitorConvertToNumber.h>
#include <Common/DateLUT.h>
2022-09-18 03:16:08 +00:00
namespace DB
{
namespace ErrorCodes
{
2020-02-25 18:02:41 +00:00
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int TYPE_MISMATCH;
2021-11-19 14:58:05 +00:00
extern const int UNEXPECTED_DATA_AFTER_PARSED_VALUE;
}
2017-04-02 17:37:49 +00:00
/** Checking for a `Field from` of `From` type falls to a range of values of type `To`.
* `From` and `To` - numeric types. They can be floating-point types.
* `From` is one of UInt64, Int64, Float64,
* whereas `To` can also be 8, 16, 32 bit.
*
2017-04-02 17:37:49 +00:00
* If falls into a range, then `from` is converted to the `Field` closest to the `To` type.
* If not, return Field(Null).
*/
namespace
{
template <typename From, typename To>
2022-04-15 23:15:40 +00:00
Field convertNumericTypeImpl(const Field & from)
{
To result;
if (!accurate::convertNumeric(from.get<From>(), result))
return {};
return result;
}
template <typename To>
2022-04-15 23:15:40 +00:00
Field convertNumericType(const Field & from, const IDataType & type)
{
if (from.getType() == Field::Types::UInt64 || from.getType() == Field::Types::Bool)
return convertNumericTypeImpl<UInt64, To>(from);
if (from.getType() == Field::Types::Int64)
return convertNumericTypeImpl<Int64, To>(from);
if (from.getType() == Field::Types::Float64)
return convertNumericTypeImpl<Float64, To>(from);
2021-05-03 15:41:37 +00:00
if (from.getType() == Field::Types::UInt128)
return convertNumericTypeImpl<UInt128, To>(from);
if (from.getType() == Field::Types::Int128)
return convertNumericTypeImpl<Int128, To>(from);
if (from.getType() == Field::Types::UInt256)
return convertNumericTypeImpl<UInt256, To>(from);
if (from.getType() == Field::Types::Int256)
return convertNumericTypeImpl<Int256, To>(from);
throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
type.getName(), from.getType());
}
template <typename From, typename T>
2022-04-15 23:15:40 +00:00
Field convertIntToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
{
From value = from.get<From>();
if (!type.canStoreWhole(value))
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName());
T scaled_value = type.getScaleMultiplier() * T(static_cast<typename T::NativeType>(value));
return DecimalField<T>(scaled_value, type.getScale());
}
template <typename T>
2022-04-15 23:15:40 +00:00
Field convertStringToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
{
const String & str_value = from.get<String>();
T value = type.parseFromString(str_value);
return DecimalField<T>(value, type.getScale());
}
template <typename From, typename T>
2022-04-15 23:15:40 +00:00
Field convertDecimalToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
{
auto field = from.get<DecimalField<From>>();
T value = convertDecimals<DataTypeDecimal<From>, DataTypeDecimal<T>>(field.getValue(), field.getScale(), type.getScale());
return DecimalField<T>(value, type.getScale());
}
template <typename From, typename T>
Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal<T> & type)
{
From value = from.get<From>();
if (!type.canStoreWhole(value))
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName());
//String sValue = convertFieldToString(from);
//int fromScale = sValue.length()- sValue.find('.') - 1;
UInt32 scale = type.getScale();
auto scaled_value = convertToDecimal<DataTypeNumber<From>, DataTypeDecimal<T>>(value, scale);
return DecimalField<T>(scaled_value, scale);
}
template <typename To>
2022-04-15 23:15:40 +00:00
Field convertDecimalType(const Field & from, const To & type)
{
if (from.getType() == Field::Types::UInt64)
return convertIntToDecimalType<UInt64>(from, type);
if (from.getType() == Field::Types::Int64)
return convertIntToDecimalType<Int64>(from, type);
2021-05-03 15:41:37 +00:00
if (from.getType() == Field::Types::UInt128)
return convertIntToDecimalType<UInt128>(from, type);
if (from.getType() == Field::Types::Int128)
return convertIntToDecimalType<Int128>(from, type);
if (from.getType() == Field::Types::UInt256)
return convertIntToDecimalType<UInt256>(from, type);
if (from.getType() == Field::Types::Int256)
return convertIntToDecimalType<Int256>(from, type);
if (from.getType() == Field::Types::String)
return convertStringToDecimalType(from, type);
if (from.getType() == Field::Types::Decimal32)
return convertDecimalToDecimalType<Decimal32>(from, type);
if (from.getType() == Field::Types::Decimal64)
return convertDecimalToDecimalType<Decimal64>(from, type);
if (from.getType() == Field::Types::Decimal128)
return convertDecimalToDecimalType<Decimal128>(from, type);
2023-03-14 21:10:52 +00:00
if (from.getType() == Field::Types::Decimal256)
return convertDecimalToDecimalType<Decimal256>(from, type);
if (from.getType() == Field::Types::Float64)
return convertFloatToDecimalType<Float64>(from, type);
throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
type.getName(), from.getType());
}
Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint)
{
2021-04-22 19:50:48 +00:00
if (from_type_hint && from_type_hint->equals(type))
{
return src;
}
WhichDataType which_type(type);
WhichDataType which_from_type;
2021-05-03 23:27:05 +00:00
if (from_type_hint)
{
which_from_type = WhichDataType(*from_type_hint);
}
/// Conversion between Date and DateTime and vice versa.
if (which_type.isDate() && which_from_type.isDateTime())
{
Extended range of DateTime64 to years 1925 - 2238 The Year 1925 is a starting point because most of the timezones switched to saner (mostly 15-minutes based) offsets somewhere during 1924 or before. And that significantly simplifies implementation. 2238 is to simplify arithmetics for sanitizing LUT index access; there are less than 0x1ffff days from 1925. * Extended DateLUTImpl internal LUT to 0x1ffff items, some of which represent negative (pre-1970) time values. As a collateral benefit, Date now correctly supports dates up to 2149 (instead of 2106). * Added a new strong typedef ExtendedDayNum, which represents dates pre-1970 and post 2149. * Functions that used to return DayNum now return ExtendedDayNum. * Refactored DateLUTImpl to untie DayNum from the dual role of being a value and an index (due to negative time). Index is now a different type LUTIndex with explicit conversion functions from DatNum, time_t, and ExtendedDayNum. * Updated DateLUTImpl to properly support values close to epoch start (1970-01-01 00:00), including negative ones. * Reduced resolution of DateLUTImpl::Values::time_at_offset_change to multiple of 15-minutes to allow storing 64-bits of time_t in DateLUTImpl::Value while keeping same size. * Minor performance updates to DateLUTImpl when building month LUT by skipping non-start-of-month days. * Fixed extractTimeZoneFromFunctionArguments to work correctly with DateTime64. * New unit-tests and stateless integration tests for both DateTime and DateTime64.
2020-04-17 13:26:44 +00:00
return static_cast<UInt16>(static_cast<const DataTypeDateTime &>(*from_type_hint).getTimeZone().toDayNum(src.get<UInt64>()).toUnderType());
}
else if (which_type.isDate32() && which_from_type.isDateTime())
{
return static_cast<Int32>(static_cast<const DataTypeDateTime &>(*from_type_hint).getTimeZone().toDayNum(src.get<UInt64>()).toUnderType());
}
else if (which_type.isDateTime() && which_from_type.isDate())
{
return static_cast<const DataTypeDateTime &>(type).getTimeZone().fromDayNum(DayNum(src.get<UInt64>()));
}
else if (which_type.isDateTime() && which_from_type.isDate32())
{
return static_cast<const DataTypeDateTime &>(type).getTimeZone().fromDayNum(DayNum(src.get<Int32>()));
}
else if (type.isValueRepresentedByNumber() && src.getType() != Field::Types::String)
{
if (which_type.isUInt8()) return convertNumericType<UInt8>(src, type);
if (which_type.isUInt16()) return convertNumericType<UInt16>(src, type);
if (which_type.isUInt32()) return convertNumericType<UInt32>(src, type);
if (which_type.isUInt64()) return convertNumericType<UInt64>(src, type);
if (which_type.isUInt128()) return convertNumericType<UInt128>(src, type);
if (which_type.isUInt256()) return convertNumericType<UInt256>(src, type);
2018-11-26 00:56:50 +00:00
if (which_type.isInt8()) return convertNumericType<Int8>(src, type);
if (which_type.isInt16()) return convertNumericType<Int16>(src, type);
if (which_type.isInt32()) return convertNumericType<Int32>(src, type);
if (which_type.isInt64()) return convertNumericType<Int64>(src, type);
if (which_type.isInt128()) return convertNumericType<Int128>(src, type);
if (which_type.isInt256()) return convertNumericType<Int256>(src, type);
if (which_type.isFloat32()) return convertNumericType<Float32>(src, type);
if (which_type.isFloat64()) return convertNumericType<Float64>(src, type);
2020-04-22 06:01:33 +00:00
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal32> *>(&type)) return convertDecimalType(src, *ptype);
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal64> *>(&type)) return convertDecimalType(src, *ptype);
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal128> *>(&type)) return convertDecimalType(src, *ptype);
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal256> *>(&type)) return convertDecimalType(src, *ptype);
2019-09-13 18:25:53 +00:00
if (which_type.isEnum() && (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64))
{
2019-09-13 18:25:53 +00:00
/// Convert UInt64 or Int64 to Enum's value
return dynamic_cast<const IDataTypeEnum &>(type).castToValue(src);
}
if ((which_type.isDate() || which_type.isDateTime()) && src.getType() == Field::Types::UInt64)
2019-09-17 08:17:46 +00:00
{
/// We don't need any conversion UInt64 is under type of Date and DateTime
return src;
2019-09-17 08:17:46 +00:00
}
2020-06-14 04:35:50 +00:00
if (which_type.isDate32() && src.getType() == Field::Types::Int64)
{
/// We don't need any conversion Int64 is under type of Date32
return src;
}
2020-06-14 17:48:10 +00:00
if (which_type.isDateTime64() && src.getType() == Field::Types::Decimal64)
2020-06-14 04:35:50 +00:00
{
/// Already in needed type.
return src;
}
/// For toDate('xxx') in 1::Int64, we CAST `src` to UInt64, which may
/// produce wrong result in some special cases.
if (which_type.isDate() && src.getType() == Field::Types::Int64)
{
return convertNumericType<UInt64>(src, type);
}
/// For toDate32('xxx') in 1, we CAST `src` to Int64. Also, it may
/// produce wrong result in some special cases.
if (which_type.isDate32() && src.getType() == Field::Types::UInt64)
{
return convertNumericType<Int64>(src, type);
}
if (which_type.isDateTime64()
2022-10-21 05:28:24 +00:00
&& (src.getType() == Field::Types::UInt64 || src.getType() == Field::Types::Int64 || src.getType() == Field::Types::Decimal64))
2020-06-14 17:48:10 +00:00
{
const auto scale = static_cast<const DataTypeDateTime64 &>(type).getScale();
2022-10-21 05:28:24 +00:00
const auto decimal_value
= DecimalUtils::decimalFromComponents<DateTime64>(applyVisitor(FieldVisitorConvertToNumber<Int64>(), src), 0, scale);
return Field(DecimalField<DateTime64>(decimal_value, scale));
2020-06-14 17:48:10 +00:00
}
if (which_type.isIPv4() && src.getType() == Field::Types::IPv4)
{
/// Already in needed type.
return src;
}
}
2021-05-03 23:27:05 +00:00
else if (which_type.isUUID() && src.getType() == Field::Types::UUID)
{
/// Already in needed type.
return src;
}
else if (which_type.isIPv6())
{
/// Already in needed type.
if (src.getType() == Field::Types::IPv6)
return src;
/// Treat FixedString(16) as a binary representation of IPv6
if (which_from_type.isFixedString() && assert_cast<const DataTypeFixedString *>(from_type_hint)->getN() == IPV6_BINARY_LENGTH)
{
const auto col = type.createColumn();
ReadBufferFromString in_buffer(src.get<String>());
type.getDefaultSerialization()->deserializeBinary(*col, in_buffer, {});
return (*col)[0];
}
}
else if (which_type.isStringOrFixedString())
{
if (src.getType() == Field::Types::String)
{
if (which_type.isFixedString())
{
size_t n = assert_cast<const DataTypeFixedString &>(type).getN();
const auto & src_str = src.get<String>();
if (src_str.size() < n)
{
String src_str_extended = src_str;
src_str_extended.resize(n);
return src_str_extended;
}
}
return src;
}
return applyVisitor(FieldVisitorToString(), src);
}
else if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(&type))
{
if (src.getType() == Field::Types::Array)
{
const Array & src_arr = src.get<Array>();
size_t src_arr_size = src_arr.size();
2020-04-22 06:01:33 +00:00
const auto & element_type = *(type_array->getNestedType());
bool have_unconvertible_element = false;
Array res(src_arr_size);
for (size_t i = 0; i < src_arr_size; ++i)
{
res[i] = convertFieldToType(src_arr[i], element_type);
if (res[i].isNull() && !element_type.isNullable())
{
// See the comment for Tuples below.
have_unconvertible_element = true;
}
}
return have_unconvertible_element ? Field(Null()) : Field(res);
}
}
else if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(&type))
{
if (src.getType() == Field::Types::Tuple)
{
const auto & src_tuple = src.get<Tuple>();
size_t src_tuple_size = src_tuple.size();
size_t dst_tuple_size = type_tuple->getElements().size();
if (dst_tuple_size != src_tuple_size)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Bad size of tuple in IN or VALUES section. "
"Expected size: {}, actual size: {}", dst_tuple_size, src_tuple_size);
Tuple res(dst_tuple_size);
bool have_unconvertible_element = false;
for (size_t i = 0; i < dst_tuple_size; ++i)
{
2020-04-22 06:01:33 +00:00
const auto & element_type = *(type_tuple->getElements()[i]);
res[i] = convertFieldToType(src_tuple[i], element_type);
if (!res[i].isNull() || element_type.isNullable())
continue;
/*
* Either the source element was Null, or the conversion did not
* succeed, because the source and the requested types of the
* element are compatible, but the value is not convertible
* (e.g. trying to convert -1 from Int8 to UInt8). In these
* cases, consider the whole tuple also compatible but not
* convertible. According to the specification of this function,
* we must return Null in this case.
*
* The following elements might be not even compatible, so it
* makes sense to check them to detect user errors. Remember
* that there is an unconvertible element, and try to process
* the remaining ones. The convertFieldToType for each element
* will throw if it detects incompatibility.
*/
have_unconvertible_element = true;
}
return have_unconvertible_element ? Field(Null()) : Field(res);
}
}
2021-02-21 11:57:03 +00:00
else if (const DataTypeMap * type_map = typeid_cast<const DataTypeMap *>(&type))
{
if (src.getType() == Field::Types::Map)
{
const auto & key_type = *type_map->getKeyType();
const auto & value_type = *type_map->getValueType();
const auto & map = src.get<Map>();
size_t map_size = map.size();
Map res(map_size);
bool have_unconvertible_element = false;
for (size_t i = 0; i < map_size; ++i)
{
const auto & map_entry = map[i].get<Tuple>();
const auto & key = map_entry[0];
const auto & value = map_entry[1];
Tuple updated_entry(2);
updated_entry[0] = convertFieldToType(key, key_type);
if (updated_entry[0].isNull() && !key_type.isNullable())
have_unconvertible_element = true;
updated_entry[1] = convertFieldToType(value, value_type);
if (updated_entry[1].isNull() && !value_type.isNullable())
have_unconvertible_element = true;
res[i] = updated_entry;
}
return have_unconvertible_element ? Field(Null()) : Field(res);
}
}
2019-02-11 13:11:52 +00:00
else if (const DataTypeAggregateFunction * agg_func_type = typeid_cast<const DataTypeAggregateFunction *>(&type))
{
if (src.getType() != Field::Types::AggregateFunctionState)
2021-09-06 15:59:46 +00:00
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Cannot convert {} to {}",
src.getTypeName(), agg_func_type->getName());
2019-02-11 13:11:52 +00:00
2020-04-22 06:01:33 +00:00
const auto & name = src.get<AggregateFunctionStateData>().name;
2019-02-11 13:11:52 +00:00
if (agg_func_type->getName() != name)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert {} to {}", name, agg_func_type->getName());
2019-02-11 13:11:52 +00:00
return src;
}
2021-09-14 14:02:32 +00:00
else if (isObject(type))
2021-08-20 21:11:22 +00:00
{
2022-05-06 17:13:26 +00:00
if (src.getType() == Field::Types::Object)
return src; /// Already in needed type.
2021-08-20 21:11:22 +00:00
const auto * from_type_tuple = typeid_cast<const DataTypeTuple *>(from_type_hint);
if (src.getType() == Field::Types::Tuple && from_type_tuple && from_type_tuple->haveExplicitNames())
{
const auto & names = from_type_tuple->getElementNames();
const auto & tuple = src.get<const Tuple &>();
if (names.size() != tuple.size())
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Bad size of tuple in IN or VALUES section (while converting to Object). Expected size: {}, actual size: {}",
names.size(), tuple.size());
Object object;
for (size_t i = 0; i < names.size(); ++i)
object[names[i]] = tuple[i];
return object;
}
if (src.getType() == Field::Types::Map)
{
Object object;
const auto & map = src.get<const Map &>();
for (const auto & element : map)
2021-08-20 21:11:22 +00:00
{
const auto & map_entry = element.get<Tuple>();
2021-08-20 21:11:22 +00:00
const auto & key = map_entry[0];
const auto & value = map_entry[1];
if (key.getType() != Field::Types::String)
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Cannot convert from Map with key of type {} to Object", key.getTypeName());
object[key.get<const String &>()] = value;
}
return object;
}
}
/// Conversion from string by parsing.
if (src.getType() == Field::Types::String)
{
/// Promote data type to avoid overflows. Note that overflows in the largest data type are still possible.
const IDataType * type_to_parse = &type;
DataTypePtr holder;
if (type.canBePromoted())
{
holder = type.promoteNumericType();
type_to_parse = holder.get();
}
const auto col = type_to_parse->createColumn();
ReadBufferFromString in_buffer(src.get<String>());
2020-06-13 23:19:16 +00:00
try
{
2021-03-09 14:46:52 +00:00
type_to_parse->getDefaultSerialization()->deserializeWholeText(*col, in_buffer, FormatSettings{});
2020-06-13 23:19:16 +00:00
}
catch (Exception & e)
{
2021-11-19 14:58:05 +00:00
if (e.code() == ErrorCodes::UNEXPECTED_DATA_AFTER_PARSED_VALUE)
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert string {} to type {}", src.get<String>(), type.getName());
2020-06-13 23:19:16 +00:00
e.addMessage(fmt::format("while converting '{}' to {}", src.get<String>(), type.getName()));
throw;
}
2019-02-11 13:11:52 +00:00
Field parsed = (*col)[0];
return convertFieldToType(parsed, type, from_type_hint);
}
2019-02-11 13:11:52 +00:00
throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in IN or VALUES section. Expected: {}. Got: {}",
type.getName(), src.getType());
}
}
2021-05-03 23:27:05 +00:00
Field convertFieldToType(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint)
{
if (from_value.isNull())
return from_value;
if (from_type_hint && from_type_hint->equals(to_type))
return from_value;
2020-04-22 06:01:33 +00:00
if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(&to_type))
return convertFieldToType(from_value, *low_cardinality_type->getDictionaryType(), from_type_hint);
2020-04-22 06:01:33 +00:00
else if (const auto * nullable_type = typeid_cast<const DataTypeNullable *>(&to_type))
{
const IDataType & nested_type = *nullable_type->getNestedType();
/// NULL remains NULL after any conversion.
if (WhichDataType(nested_type).isNothing())
return {};
if (from_type_hint && from_type_hint->equals(nested_type))
return from_value;
return convertFieldToTypeImpl(from_value, nested_type, from_type_hint);
}
else
return convertFieldToTypeImpl(from_value, to_type, from_type_hint);
}
2021-05-03 23:27:05 +00:00
2020-06-24 16:37:04 +00:00
Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_type, const IDataType * from_type_hint)
{
bool is_null = from_value.isNull();
if (is_null && !to_type.isNullable())
throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert NULL to {}", to_type.getName());
2021-05-03 23:27:05 +00:00
2020-06-24 16:37:04 +00:00
Field converted = convertFieldToType(from_value, to_type, from_type_hint);
2021-05-03 23:27:05 +00:00
2020-06-24 16:37:04 +00:00
if (!is_null && converted.isNull())
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Cannot convert value '{}'{}: it cannot be represented as {}",
toString(from_value),
from_type_hint ? " from " + from_type_hint->getName() : "",
to_type.getName());
2021-05-03 23:27:05 +00:00
2020-06-24 16:37:04 +00:00
return converted;
}
}