decimal (de)serialization (in progress)

This commit is contained in:
chertus 2018-07-23 23:19:26 +03:00
parent 5ec47b4a1c
commit 02f016579c
4 changed files with 226 additions and 33 deletions

View File

@ -33,9 +33,9 @@ std::string DataTypeDecimal<T>::getName() const
}
template <typename T>
bool DataTypeDecimal<T>::equals(const IDataType &) const
bool DataTypeDecimal<T>::equals(const IDataType & rhs) const
{
return false; // TODO
return typeid(rhs) == typeid(*this) && getName() == rhs.getName();
}
template <typename T>
@ -43,10 +43,9 @@ void DataTypeDecimal<T>::serializeText(const IColumn & column, size_t row_num, W
{
const T & value = static_cast<const ColumnVector<T> &>(column).getData()[row_num];
// FIXME: scale
writeIntText(value, ostr);
writeIntText(wholePart(value), ostr);
writeChar('.', ostr);
writeIntText(0, ostr);
writeIntText(fractionalPart(value), ostr);
}
@ -54,7 +53,9 @@ template <typename T>
void DataTypeDecimal<T>::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
T x;
readDecimalText(istr, x, precision, scale);
UInt32 unread_scale = scale;
readDecimalText(istr, x, precision, unread_scale);
x *= getScaleMultiplier(unread_scale);
static_cast<ColumnVector<T> &>(column).getData().push_back(x);
}
@ -153,10 +154,10 @@ static DataTypePtr create(const ASTPtr & arguments)
throw Exception("Negative scales and scales larger than presicion are not supported", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (precision_value <= maxDecimalPrecision<Int32>())
return std::make_shared<DataTypeDecimal9>(precision_value, scale_value);
return std::make_shared<DataTypeDecimal<Int32>>(precision_value, scale_value);
else if (precision_value <= maxDecimalPrecision<Int64>())
return std::make_shared<DataTypeDecimal18>(precision_value, scale_value);
return std::make_shared<DataTypeDecimal38>(precision_value, scale_value);
return std::make_shared<DataTypeDecimal<Int64>>(precision_value, scale_value);
return std::make_shared<DataTypeDecimal<Int128>>(precision_value, scale_value);
}
@ -165,6 +166,100 @@ void registerDataTypeDecimal(DataTypeFactory & factory)
factory.registerDataType("Decimal", create, DataTypeFactory::CaseInsensitive);
}
template <>
Int32 DataTypeDecimal<Int32>::getScaleMultiplier(UInt32 scale_)
{
static const Int32 values[] = {
1,
10,
100,
1000,
10000,
100000,
1000000,
10000000,
100000000,
1000000000
};
return values[scale_];
}
template <>
Int64 DataTypeDecimal<Int64>::getScaleMultiplier(UInt32 scale_)
{
static const Int64 values[] = {
1ll,
10ll,
100ll,
1000ll,
10000ll,
100000ll,
1000000ll,
10000000ll,
100000000ll,
1000000000ll,
10000000000ll,
100000000000ll,
1000000000000ll,
10000000000000ll,
100000000000000ll,
1000000000000000ll,
10000000000000000ll,
100000000000000000ll,
1000000000000000000ll
};
return values[scale_];
}
template <>
Int128 DataTypeDecimal<Int128>::getScaleMultiplier(UInt32 scale_)
{
static const Int128 values[] = {
static_cast<Int128>(1ll),
static_cast<Int128>(10ll),
static_cast<Int128>(100ll),
static_cast<Int128>(1000ll),
static_cast<Int128>(10000ll),
static_cast<Int128>(100000ll),
static_cast<Int128>(1000000ll),
static_cast<Int128>(10000000ll),
static_cast<Int128>(100000000ll),
static_cast<Int128>(1000000000ll),
static_cast<Int128>(10000000000ll),
static_cast<Int128>(100000000000ll),
static_cast<Int128>(1000000000000ll),
static_cast<Int128>(10000000000000ll),
static_cast<Int128>(100000000000000ll),
static_cast<Int128>(1000000000000000ll),
static_cast<Int128>(10000000000000000ll),
static_cast<Int128>(100000000000000000ll),
static_cast<Int128>(1000000000000000000ll),
static_cast<Int128>(1000000000000000000ll) * 10ll,
static_cast<Int128>(1000000000000000000ll) * 100ll,
static_cast<Int128>(1000000000000000000ll) * 1000ll,
static_cast<Int128>(1000000000000000000ll) * 10000ll,
static_cast<Int128>(1000000000000000000ll) * 100000ll,
static_cast<Int128>(1000000000000000000ll) * 1000000ll,
static_cast<Int128>(1000000000000000000ll) * 10000000ll,
static_cast<Int128>(1000000000000000000ll) * 100000000ll,
static_cast<Int128>(1000000000000000000ll) * 1000000000ll,
static_cast<Int128>(1000000000000000000ll) * 10000000000ll,
static_cast<Int128>(1000000000000000000ll) * 100000000000ll,
static_cast<Int128>(1000000000000000000ll) * 1000000000000ll,
static_cast<Int128>(1000000000000000000ll) * 10000000000000ll,
static_cast<Int128>(1000000000000000000ll) * 100000000000000ll,
static_cast<Int128>(1000000000000000000ll) * 1000000000000000ll,
static_cast<Int128>(1000000000000000000ll) * 10000000000000000ll,
static_cast<Int128>(1000000000000000000ll) * 100000000000000000ll,
static_cast<Int128>(1000000000000000000ll) * 100000000000000000ll * 10ll,
static_cast<Int128>(1000000000000000000ll) * 100000000000000000ll * 100ll,
static_cast<Int128>(1000000000000000000ll) * 100000000000000000ll * 1000ll
};
return values[scale_];
}
/// Explicit template instantiations.
template class DataTypeDecimal<Int32>;
template class DataTypeDecimal<Int64>;

View File

@ -1,10 +1,16 @@
#pragma once
#include <common/likely.h>
#include <DataTypes/IDataType.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
}
///
class DataTypeSimpleSerialization : public IDataType
{
@ -52,6 +58,13 @@ class DataTypeSimpleSerialization : public IDataType
};
static constexpr size_t minDecimalPrecision() { return 1; }
template <typename T> static constexpr size_t maxDecimalPrecision();
template <> constexpr size_t maxDecimalPrecision<Int32>() { return 9; }
template <> constexpr size_t maxDecimalPrecision<Int64>() { return 18; }
template <> constexpr size_t maxDecimalPrecision<Int128>() { return 38; }
/// Implements Decimal(P, S), where P is precision, S is scale.
/// Maximum precisions for underlying types are:
/// Int32 9
@ -60,6 +73,8 @@ class DataTypeSimpleSerialization : public IDataType
/// Operation between two decimals leads to Decimal(P, S), where
/// P is one of (9, 18, 38); equals to the maximum precision for the biggest underlying type of operands.
/// S is maximum scale of operands.
///
/// NOTE: It's possible to set scale as a template parameter then most of functions become static.
template <typename T>
class DataTypeDecimal final : public DataTypeSimpleSerialization
{
@ -69,10 +84,15 @@ public:
static constexpr bool is_parametric = true;
DataTypeDecimal(UInt32 precision_, Int32 scale_)
DataTypeDecimal(UInt32 precision_, UInt32 scale_)
: precision(precision_),
scale(scale_)
{}
{
if (unlikely(precision < 1 || precision > maxDecimalPrecision<T>()))
throw Exception("Precision is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (unlikely(scale < 0 || static_cast<UInt32>(scale) > maxDecimalPrecision<T>()))
throw Exception("Scale is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
const char * getFamilyName() const override { return "Decimal"; }
std::string getName() const override;
@ -114,20 +134,59 @@ public:
bool isInteger() const override { return false; }
bool canBeInsideNullable() const override { return true; }
/// Decimal specific
UInt32 getPrecision() const { return precision; }
UInt32 getScale() const { return scale; }
T getScaleMultiplier() const { return getScaleMultiplier(scale); }
T wholePart(T x) const
{
if (scale == 0)
return x;
return x / scale;
}
T fractionalPart(T x) const
{
if (scale == 0)
return 0;
if (x < 0)
x *= -1;
return x % scale;
}
T maxWholeValue() const { return getScaleMultiplier(maxDecimalPrecision<T>() - scale) - 1; }
bool canStoreWhole(T x) const
{
T max = maxWholeValue();
if (x > max || x < -max)
return false;
return true;
}
private:
UInt32 precision;
Int32 scale; /// TODO: should we support scales out of [0, precision]?
const UInt32 precision;
const UInt32 scale; /// TODO: should we support scales out of [0, precision]?
static T getScaleMultiplier(UInt32 scale);
};
using DataTypeDecimal9 = DataTypeDecimal<Int32>;
using DataTypeDecimal18 = DataTypeDecimal<Int64>;
using DataTypeDecimal38 = DataTypeDecimal<Int128>;
template <typename T, typename U>
typename std::enable_if_t<(sizeof(T) >= sizeof(U)), const DataTypeDecimal<T>>
decimalResultType(const DataTypeDecimal<T> & tx, const DataTypeDecimal<U> & ty)
{
return DataTypeDecimal<T>(maxDecimalPrecision<T>(), max(tx.getScale(), ty.getScale()));
}
template <typename T, typename U>
typename std::enable_if_t<(sizeof(T) < sizeof(U)), const DataTypeDecimal<U>>
decimalResultType(const DataTypeDecimal<T> & tx, const DataTypeDecimal<U> & ty)
{
return DataTypeDecimal<U>(maxDecimalPrecision<U>(), max(tx.getScale(), ty.getScale()));
}
static constexpr size_t minDecimalPrecision() { return 1; }
template <typename T> static constexpr size_t maxDecimalPrecision();
template <> constexpr size_t maxDecimalPrecision<Int32>() { return 9; }
template <> constexpr size_t maxDecimalPrecision<Int64>() { return 18; }
template <> constexpr size_t maxDecimalPrecision<Int128>() { return 38; }
}

View File

@ -552,12 +552,13 @@ ReturnType readFloatTextSimpleImpl(T & x, ReadBuffer & buf)
}
/// TODO: negative scales, trailing zeroes
template <typename T>
void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, int scale)
inline void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, unsigned int & scale)
{
x = 0;
int sign = 1;
bool leading_zeores = true;
bool trailing_zeores = false;
bool after_point = false;
if (buf.eof())
@ -578,14 +579,13 @@ void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, int scale)
while (!buf.eof())
{
if (!precision || scale < 0)
throw Exception("Cannot read decimal value", ErrorCodes::CANNOT_PARSE_NUMBER);
const char & byte = *buf.position();
switch (byte)
{
case '.':
after_point = true;
if (scale == 0)
trailing_zeores = true;
break;
case '1': [[fallthrough]];
case '2': [[fallthrough]];
@ -596,30 +596,36 @@ void readDecimalText(ReadBuffer & buf, T & x, unsigned int precision, int scale)
case '7': [[fallthrough]];
case '8': [[fallthrough]];
case '9':
if (unlikely(sign))
{
x = sign * (byte - '0');
sign = 0; /// no more leading zeroes
break;
}
leading_zeores = false;
if (trailing_zeores || precision == 0)
throw Exception("Cannot read decimal value", ErrorCodes::CANNOT_PARSE_NUMBER);
[[fallthrough]];
case '0':
{
if (likely(sign == 0))
/// ignore leading and trailing zeroes
if (likely(!leading_zeores && !trailing_zeores))
{
if (precision == 0 || precision < scale)
throw Exception("Cannot read decimal value", ErrorCodes::CANNOT_PARSE_NUMBER);
--precision;
x = x * 10 + (byte - '0');
}
if (after_point)
{
--scale;
if (scale == 0)
trailing_zeores = true;
}
break;
}
default:
x *= sign;
return;
}
++buf.position();
}
x *= sign;
}

View File

@ -6,6 +6,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeDate.h>
@ -72,6 +73,32 @@ static Field convertNumericType(const Field & from, const IDataType & type)
}
template <typename From, typename To>
static Field convertIntToDecimalTypeImpl(const Field & from, const To & type)
{
using FieldType = typename NearestFieldType<typename To::UnderlyingType>::Type;
From value = from.get<From>();
if (!type.canStoreWhole(value))
throw Exception("Number is too much to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
FieldType scaled_value = type.getScaleMultiplier() * value;
return Field(FieldType(scaled_value));
}
template <typename To>
static Field convertIntToDecimalType(const Field & from, const To & type)
{
if (from.getType() == Field::Types::UInt64)
return convertIntToDecimalTypeImpl<UInt64>(from, type);
if (from.getType() == Field::Types::Int64)
return convertIntToDecimalTypeImpl<Int64>(from, type);
throw Exception("Type mismatch in IN or VALUES section. Expected: " + type.getName() + ". Got: "
+ Field::Types::toString(from.getType()), ErrorCodes::TYPE_MISMATCH);
}
DayNum stringToDate(const String & s)
{
ReadBufferFromString in(s);
@ -123,6 +150,12 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type)
if (typeid_cast<const DataTypeInt64 *>(&type)) return convertNumericType<Int64>(src, type);
if (typeid_cast<const DataTypeFloat32 *>(&type)) return convertNumericType<Float32>(src, type);
if (typeid_cast<const DataTypeFloat64 *>(&type)) return convertNumericType<Float64>(src, type);
if (typeid_cast<const DataTypeDecimal<Int32> *>(&type))
return convertIntToDecimalType(src, typeid_cast<const DataTypeDecimal<Int32> &>(type));
if (typeid_cast<const DataTypeDecimal<Int64> *>(&type))
return convertIntToDecimalType(src, typeid_cast<const DataTypeDecimal<Int64> &>(type));
if (typeid_cast<const DataTypeDecimal<Int128> *>(&type))
return convertIntToDecimalType(src, typeid_cast<const DataTypeDecimal<Int128> &>(type));
const bool is_date = typeid_cast<const DataTypeDate *>(&type);
bool is_datetime = false;