From e40a384372cd5bfe8dfa85d22618837842c557ac Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Thu, 28 Mar 2019 22:33:01 +0000 Subject: [PATCH 001/147] add datetime64 definition --- dbms/src/Core/Types.h | 1 + dbms/src/DataTypes/DataTypeDateTime.cpp | 137 ++++++++++++++++++------ dbms/src/DataTypes/DataTypeDateTime.h | 18 +++- dbms/src/Formats/ProtobufReader.h | 1 + dbms/src/Formats/ProtobufWriter.h | 1 + 5 files changed, 124 insertions(+), 34 deletions(-) diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index e4882cd64f7..5fa70e668bd 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -74,6 +74,7 @@ enum class TypeIndex Float64, Date, DateTime, + DateTime64, String, FixedString, Enum8, diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index f3d6efa1488..9f229ffdd95 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -20,29 +20,64 @@ namespace DB { -DataTypeDateTime::DataTypeDateTime(const std::string & time_zone_name) +template +struct TypeGetter; + +template<> +struct TypeGetter { + using Type = time_t; + using Column = ColumnUInt32; + static constexpr TypeIndex Index = TypeIndex::DateTime; + static constexpr const char * Name = "DateTime"; +}; + +template<> +struct TypeGetter { + using Type = UInt64; + using Column = ColumnUInt64; + static constexpr TypeIndex Index = TypeIndex::DateTime64; + static constexpr const char * Name = "DateTime64"; +}; + +template +DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_zone_name) : has_explicit_time_zone(!time_zone_name.empty()), time_zone(DateLUT::instance(time_zone_name)), utc_time_zone(DateLUT::instance("UTC")) { } -std::string DataTypeDateTime::doGetName() const +template +const char * DataTypeDateTimeBase::getFamilyName() const +{ + return TypeGetter::Name; +} + +template +std::string DataTypeDateTimeBase::doGetName() const { if (!has_explicit_time_zone) - return "DateTime"; + return TypeGetter::Name; WriteBufferFromOwnString out; - out << "DateTime(" << quote << time_zone.getTimeZone() << ")"; + out << TypeGetter::Name << "(" << quote << time_zone.getTimeZone() << ")"; return out.str(); } -void DataTypeDateTime::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +template +TypeIndex DataTypeDateTimeBase::getTypeId() const { - writeDateTimeText(static_cast(column).getData()[row_num], ostr, time_zone); + return TypeGetter::Index; } -void DataTypeDateTime::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +{ + writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); +} + +template +void DataTypeDateTimeBase::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); } @@ -61,24 +96,41 @@ static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings } } - -void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +static inline void readText(UInt64 & /*x*/, ReadBuffer & /*istr*/, const FormatSettings & /*settings*/, const DateLUTImpl & /*time_zone*/, const DateLUTImpl & /*utc_time_zone*/) { - time_t x; - readText(x, istr, settings, time_zone, utc_time_zone); - static_cast(column).getData().push_back(x); + // TODO implement this +// return; +// switch (settings.date_time_input_format) +// { +// case FormatSettings::DateTimeInputFormat::Basic: +// readDateTimeText(x, istr, time_zone); +// return; +// case FormatSettings::DateTimeInputFormat::BestEffort: +// parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone); +// return; +// } } -void DataTypeDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + typename TypeGetter::Type x; + readText(x, istr, settings, time_zone, utc_time_zone); + static_cast::Column &>(column).getData().push_back(x); +} + +template +void DataTypeDateTimeBase::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('\'', ostr); serializeText(column, row_num, ostr, settings); writeChar('\'', ostr); } -void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808' { readText(x, istr, settings, time_zone, utc_time_zone); @@ -88,19 +140,21 @@ void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr { readIntText(x, istr); } - static_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. + static_cast::Column &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. } -void DataTypeDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('"', ostr); serializeText(column, row_num, ostr, settings); writeChar('"', ostr); } -void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (checkChar('"', istr)) { readText(x, istr, settings, time_zone, utc_time_zone); @@ -110,19 +164,21 @@ void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, { readIntText(x, istr); } - static_cast(column).getData().push_back(x); + static_cast::Column &>(column).getData().push_back(x); } -void DataTypeDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('"', ostr); serializeText(column, row_num, ostr, settings); writeChar('"', ostr); } -void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (istr.eof()) throwReadAfterEOF(); @@ -137,24 +193,27 @@ void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c if (maybe_quote == '\'' || maybe_quote == '\"') assertChar(maybe_quote, istr); - static_cast(column).getData().push_back(x); + static_cast::Column &>(column).getData().push_back(x); } -void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const +template +void DataTypeDateTimeBase::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const { if (value_index) return; - value_index = static_cast(protobuf.writeDateTime(static_cast(column).getData()[row_num])); + typename TypeGetter::Type t = static_cast::Column &>(column).getData()[row_num]; + value_index = static_cast(protobuf.writeDateTime(t)); } -void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const +template +void DataTypeDateTimeBase::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const { row_added = false; - time_t t; + typename TypeGetter::Type t; if (!protobuf.readDateTime(t)) return; - auto & container = static_cast(column).getData(); + auto & container = static_cast::Column &>(column).getData(); if (allow_add_row) { container.emplace_back(t); @@ -164,7 +223,8 @@ void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & pr container.back() = t; } -bool DataTypeDateTime::equals(const IDataType & rhs) const +template +bool DataTypeDateTimeBase::equals(const IDataType & rhs) const { /// DateTime with different timezones are equal, because: /// "all types with different time zones are equivalent and may be used interchangingly." @@ -193,9 +253,26 @@ static DataTypePtr create(const ASTPtr & arguments) return std::make_shared(arg->value.get()); } +static DataTypePtr create64(const ASTPtr & arguments) +{ + if (!arguments) + return std::make_shared(); + + if (arguments->children.size() != 1) + throw Exception("DateTime64 data type can optionally have only one argument - time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * arg = arguments->children[0]->as(); + if (!arg || arg->value.getType() != Field::Types::String) + throw Exception("Parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(arg->value.get()); +} + void registerDataTypeDateTime(DataTypeFactory & factory) { factory.registerDataType("DateTime", create, DataTypeFactory::CaseInsensitive); + factory.registerDataType("DateTime64", create64, DataTypeFactory::CaseInsensitive); + factory.registerAlias("TIMESTAMP", "DateTime", DataTypeFactory::CaseInsensitive); } diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 679a2777472..ba6116a2222 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -28,14 +28,15 @@ namespace DB * Server time zone is the time zone specified in 'timezone' parameter in configuration file, * or system time zone at the moment of server startup. */ -class DataTypeDateTime final : public DataTypeNumberBase +template +class DataTypeDateTimeBase : public DataTypeNumberBase { public: - DataTypeDateTime(const std::string & time_zone_name = ""); + DataTypeDateTimeBase(const std::string & time_zone_name = ""); - const char * getFamilyName() const override { return "DateTime"; } + const char * getFamilyName() const override; std::string doGetName() const override; - TypeIndex getTypeId() const override { return TypeIndex::DateTime; } + TypeIndex getTypeId() const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -62,4 +63,13 @@ private: const DateLUTImpl & utc_time_zone; }; +struct DataTypeDateTime : DataTypeDateTimeBase { + using DataTypeDateTimeBase::DataTypeDateTimeBase; +}; + +struct DataTypeDateTime64 : DataTypeDateTimeBase { + using DataTypeDateTimeBase::DataTypeDateTimeBase; +}; + } + diff --git a/dbms/src/Formats/ProtobufReader.h b/dbms/src/Formats/ProtobufReader.h index b9b1ac36c51..f732312393d 100644 --- a/dbms/src/Formats/ProtobufReader.h +++ b/dbms/src/Formats/ProtobufReader.h @@ -72,6 +72,7 @@ public: bool readUUID(UUID & uuid) { return current_converter->readUUID(uuid); } bool readDate(DayNum & date) { return current_converter->readDate(date); } bool readDateTime(time_t & tm) { return current_converter->readDateTime(tm); } + bool readDateTime(UInt64 & tm) { return current_converter->readUInt64(tm); } bool readDecimal(Decimal32 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal32(decimal, precision, scale); } bool readDecimal(Decimal64 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal64(decimal, precision, scale); } diff --git a/dbms/src/Formats/ProtobufWriter.h b/dbms/src/Formats/ProtobufWriter.h index aba3a2b2dc6..9a1df919b3b 100644 --- a/dbms/src/Formats/ProtobufWriter.h +++ b/dbms/src/Formats/ProtobufWriter.h @@ -70,6 +70,7 @@ public: bool writeUUID(const UUID & uuid) { return writeValueIfPossible(&IConverter::writeUUID, uuid); } bool writeDate(DayNum date) { return writeValueIfPossible(&IConverter::writeDate, date); } bool writeDateTime(time_t tm) { return writeValueIfPossible(&IConverter::writeDateTime, tm); } + bool writeDateTime(UInt64 tm) { return writeValueIfPossible(&IConverter::writeUInt64, tm); } bool writeDecimal(Decimal32 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal32, decimal, scale); } bool writeDecimal(Decimal64 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal64, decimal, scale); } bool writeDecimal(const Decimal128 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal128, decimal, scale); } From 9407a24759771cd2afb6cf7250e720d0d291351f Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Mon, 1 Apr 2019 17:18:13 +0100 Subject: [PATCH 002/147] able to insert DateTime64 objects into the table --- dbms/src/DataTypes/DataTypeDateTime.cpp | 71 +++++++++++++++++-- dbms/src/DataTypes/DataTypeDateTime.h | 18 ++++- .../0_stateless/00921_datetime64.reference | 3 + .../queries/0_stateless/00921_datetime64.sql | 15 ++++ 4 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00921_datetime64.reference create mode 100644 dbms/tests/queries/0_stateless/00921_datetime64.sql diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 9f229ffdd95..502f4562dbb 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -16,6 +16,7 @@ #include +#include namespace DB { @@ -47,6 +48,23 @@ DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_ { } +DataTypeDateTime64::Precision parsePrecision(const std::string & precision_name) +{ + if (precision_name == "MILLI") + return DataTypeDateTime64::Precision::Millis; + else if (precision_name == "MICRO") + return DataTypeDateTime64::Precision::Micros; + return DataTypeDateTime64::Precision::Nanos; +} + +DataTypeDateTime64::DataTypeDateTime64(const std::string & time_zone_name, const std::string & precision_name) + : DataTypeDateTimeBase(time_zone_name), + precision(parsePrecision(precision_name)) +{ +} + + + template const char * DataTypeDateTimeBase::getFamilyName() const { @@ -76,6 +94,43 @@ void DataTypeDateTimeBase::serializeText(const IColumn & column, siz writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); } +void DataTypeDateTime64::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +{ + time_t base_time; + auto full_time = static_cast(column).getData()[row_num]; + UInt32 time_fraction; + int pad_length = 0; + + switch(precision) { + case DataTypeDateTime64::Precision::Millis: { + base_time = full_time / MILLIS_PER_SECOND; + time_fraction = full_time % MILLIS_PER_SECOND; + pad_length = 3; + break; + } + case DataTypeDateTime64::Precision::Micros: { + base_time = full_time / MICROS_PER_SECOND; + time_fraction = full_time % MICROS_PER_SECOND; + pad_length = 6; + break; + } + case DataTypeDateTime64::Precision::Nanos: { + base_time = full_time / NANOS_PER_SECOND; + time_fraction = full_time % NANOS_PER_SECOND; + pad_length = 9; + break; + } + } + + writeDateTimeText(base_time, ostr, time_zone); + writeText(".", 1, ostr); + + /// TODO make this efficient + std::stringstream ss; + ss << std::setfill('0') << std::setw(pad_length) << time_fraction; + writeText(ss.str(), ostr); +} + template void DataTypeDateTimeBase::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { @@ -258,14 +313,18 @@ static DataTypePtr create64(const ASTPtr & arguments) if (!arguments) return std::make_shared(); - if (arguments->children.size() != 1) - throw Exception("DateTime64 data type can optionally have only one argument - time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arguments->children.size() != 2) + throw Exception("DateTime64 data type can optionally have 2 arguments - precision and time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - const auto * arg = arguments->children[0]->as(); - if (!arg || arg->value.getType() != Field::Types::String) - throw Exception("Parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const auto * timezone_arg = arguments->children[0]->as(); + if (!timezone_arg || timezone_arg->value.getType() != Field::Types::String) + throw Exception("Timezone parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(arg->value.get()); + const auto * precision_arg = arguments->children[1]->as(); + if (!precision_arg || precision_arg->value.getType() != Field::Types::String) + throw Exception("Precision parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(timezone_arg->value.get(), precision_arg->value.get()); } void registerDataTypeDateTime(DataTypeFactory & factory) diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index ba6116a2222..5e0d0d6085c 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -57,7 +57,7 @@ public: const DateLUTImpl & getTimeZone() const { return time_zone; } -private: +protected: bool has_explicit_time_zone; const DateLUTImpl & time_zone; const DateLUTImpl & utc_time_zone; @@ -68,7 +68,21 @@ struct DataTypeDateTime : DataTypeDateTimeBase { }; struct DataTypeDateTime64 : DataTypeDateTimeBase { - using DataTypeDateTimeBase::DataTypeDateTimeBase; + enum class Precision { + Millis, + Micros, + Nanos, + }; + static constexpr UInt32 MILLIS_PER_SECOND = 1000; + static constexpr UInt32 MICROS_PER_SECOND = 1000 * 1000; + static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; + + DataTypeDateTime64(const std::string & time_zone_name = "", const std::string & precision_name = ""); + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + +private: + const Precision precision; }; } diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.reference b/dbms/tests/queries/0_stateless/00921_datetime64.reference new file mode 100644 index 00000000000..c866f4b76b8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00921_datetime64.reference @@ -0,0 +1,3 @@ +2 1970-01-01 01:00:01.000000001 1 0 +2 1970-01-01 01:00:01.000000003 3 3 +2 1970-01-01 01:00:01.000000005 5 3 diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql new file mode 100644 index 00000000000..82938dbf5ed --- /dev/null +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -0,0 +1,15 @@ +USE test; + +DROP TABLE IF EXISTS A; +DROP TABLE IF EXISTS B; + +CREATE TABLE A(k UInt32, t DateTime64, a Float64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO A(k,t,a) VALUES (2,1000000001,1),(2,1000000003,3),(2,1000000005,5); + +CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B(k,t,b) VALUES (2,1000000003,3); + +SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); + +DROP TABLE B; +DROP TABLE A; From a602a6c79cf4945156813ed5e3a56b4bae6c5f7f Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Thu, 28 Mar 2019 22:33:01 +0000 Subject: [PATCH 003/147] add datetime64 definition --- dbms/src/Core/Types.h | 1 + dbms/src/DataTypes/DataTypeDateTime.cpp | 137 ++++++++++++++++++------ dbms/src/DataTypes/DataTypeDateTime.h | 18 +++- dbms/src/Formats/ProtobufReader.h | 1 + dbms/src/Formats/ProtobufWriter.h | 1 + 5 files changed, 124 insertions(+), 34 deletions(-) diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index e4882cd64f7..5fa70e668bd 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -74,6 +74,7 @@ enum class TypeIndex Float64, Date, DateTime, + DateTime64, String, FixedString, Enum8, diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index f3d6efa1488..9f229ffdd95 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -20,29 +20,64 @@ namespace DB { -DataTypeDateTime::DataTypeDateTime(const std::string & time_zone_name) +template +struct TypeGetter; + +template<> +struct TypeGetter { + using Type = time_t; + using Column = ColumnUInt32; + static constexpr TypeIndex Index = TypeIndex::DateTime; + static constexpr const char * Name = "DateTime"; +}; + +template<> +struct TypeGetter { + using Type = UInt64; + using Column = ColumnUInt64; + static constexpr TypeIndex Index = TypeIndex::DateTime64; + static constexpr const char * Name = "DateTime64"; +}; + +template +DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_zone_name) : has_explicit_time_zone(!time_zone_name.empty()), time_zone(DateLUT::instance(time_zone_name)), utc_time_zone(DateLUT::instance("UTC")) { } -std::string DataTypeDateTime::doGetName() const +template +const char * DataTypeDateTimeBase::getFamilyName() const +{ + return TypeGetter::Name; +} + +template +std::string DataTypeDateTimeBase::doGetName() const { if (!has_explicit_time_zone) - return "DateTime"; + return TypeGetter::Name; WriteBufferFromOwnString out; - out << "DateTime(" << quote << time_zone.getTimeZone() << ")"; + out << TypeGetter::Name << "(" << quote << time_zone.getTimeZone() << ")"; return out.str(); } -void DataTypeDateTime::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +template +TypeIndex DataTypeDateTimeBase::getTypeId() const { - writeDateTimeText(static_cast(column).getData()[row_num], ostr, time_zone); + return TypeGetter::Index; } -void DataTypeDateTime::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +{ + writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); +} + +template +void DataTypeDateTimeBase::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); } @@ -61,24 +96,41 @@ static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings } } - -void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +static inline void readText(UInt64 & /*x*/, ReadBuffer & /*istr*/, const FormatSettings & /*settings*/, const DateLUTImpl & /*time_zone*/, const DateLUTImpl & /*utc_time_zone*/) { - time_t x; - readText(x, istr, settings, time_zone, utc_time_zone); - static_cast(column).getData().push_back(x); + // TODO implement this +// return; +// switch (settings.date_time_input_format) +// { +// case FormatSettings::DateTimeInputFormat::Basic: +// readDateTimeText(x, istr, time_zone); +// return; +// case FormatSettings::DateTimeInputFormat::BestEffort: +// parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone); +// return; +// } } -void DataTypeDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + typename TypeGetter::Type x; + readText(x, istr, settings, time_zone, utc_time_zone); + static_cast::Column &>(column).getData().push_back(x); +} + +template +void DataTypeDateTimeBase::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('\'', ostr); serializeText(column, row_num, ostr, settings); writeChar('\'', ostr); } -void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808' { readText(x, istr, settings, time_zone, utc_time_zone); @@ -88,19 +140,21 @@ void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr { readIntText(x, istr); } - static_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. + static_cast::Column &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. } -void DataTypeDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('"', ostr); serializeText(column, row_num, ostr, settings); writeChar('"', ostr); } -void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (checkChar('"', istr)) { readText(x, istr, settings, time_zone, utc_time_zone); @@ -110,19 +164,21 @@ void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, { readIntText(x, istr); } - static_cast(column).getData().push_back(x); + static_cast::Column &>(column).getData().push_back(x); } -void DataTypeDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('"', ostr); serializeText(column, row_num, ostr, settings); writeChar('"', ostr); } -void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (istr.eof()) throwReadAfterEOF(); @@ -137,24 +193,27 @@ void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c if (maybe_quote == '\'' || maybe_quote == '\"') assertChar(maybe_quote, istr); - static_cast(column).getData().push_back(x); + static_cast::Column &>(column).getData().push_back(x); } -void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const +template +void DataTypeDateTimeBase::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const { if (value_index) return; - value_index = static_cast(protobuf.writeDateTime(static_cast(column).getData()[row_num])); + typename TypeGetter::Type t = static_cast::Column &>(column).getData()[row_num]; + value_index = static_cast(protobuf.writeDateTime(t)); } -void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const +template +void DataTypeDateTimeBase::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const { row_added = false; - time_t t; + typename TypeGetter::Type t; if (!protobuf.readDateTime(t)) return; - auto & container = static_cast(column).getData(); + auto & container = static_cast::Column &>(column).getData(); if (allow_add_row) { container.emplace_back(t); @@ -164,7 +223,8 @@ void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & pr container.back() = t; } -bool DataTypeDateTime::equals(const IDataType & rhs) const +template +bool DataTypeDateTimeBase::equals(const IDataType & rhs) const { /// DateTime with different timezones are equal, because: /// "all types with different time zones are equivalent and may be used interchangingly." @@ -193,9 +253,26 @@ static DataTypePtr create(const ASTPtr & arguments) return std::make_shared(arg->value.get()); } +static DataTypePtr create64(const ASTPtr & arguments) +{ + if (!arguments) + return std::make_shared(); + + if (arguments->children.size() != 1) + throw Exception("DateTime64 data type can optionally have only one argument - time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * arg = arguments->children[0]->as(); + if (!arg || arg->value.getType() != Field::Types::String) + throw Exception("Parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(arg->value.get()); +} + void registerDataTypeDateTime(DataTypeFactory & factory) { factory.registerDataType("DateTime", create, DataTypeFactory::CaseInsensitive); + factory.registerDataType("DateTime64", create64, DataTypeFactory::CaseInsensitive); + factory.registerAlias("TIMESTAMP", "DateTime", DataTypeFactory::CaseInsensitive); } diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 679a2777472..ba6116a2222 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -28,14 +28,15 @@ namespace DB * Server time zone is the time zone specified in 'timezone' parameter in configuration file, * or system time zone at the moment of server startup. */ -class DataTypeDateTime final : public DataTypeNumberBase +template +class DataTypeDateTimeBase : public DataTypeNumberBase { public: - DataTypeDateTime(const std::string & time_zone_name = ""); + DataTypeDateTimeBase(const std::string & time_zone_name = ""); - const char * getFamilyName() const override { return "DateTime"; } + const char * getFamilyName() const override; std::string doGetName() const override; - TypeIndex getTypeId() const override { return TypeIndex::DateTime; } + TypeIndex getTypeId() const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -62,4 +63,13 @@ private: const DateLUTImpl & utc_time_zone; }; +struct DataTypeDateTime : DataTypeDateTimeBase { + using DataTypeDateTimeBase::DataTypeDateTimeBase; +}; + +struct DataTypeDateTime64 : DataTypeDateTimeBase { + using DataTypeDateTimeBase::DataTypeDateTimeBase; +}; + } + diff --git a/dbms/src/Formats/ProtobufReader.h b/dbms/src/Formats/ProtobufReader.h index b9b1ac36c51..f732312393d 100644 --- a/dbms/src/Formats/ProtobufReader.h +++ b/dbms/src/Formats/ProtobufReader.h @@ -72,6 +72,7 @@ public: bool readUUID(UUID & uuid) { return current_converter->readUUID(uuid); } bool readDate(DayNum & date) { return current_converter->readDate(date); } bool readDateTime(time_t & tm) { return current_converter->readDateTime(tm); } + bool readDateTime(UInt64 & tm) { return current_converter->readUInt64(tm); } bool readDecimal(Decimal32 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal32(decimal, precision, scale); } bool readDecimal(Decimal64 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal64(decimal, precision, scale); } diff --git a/dbms/src/Formats/ProtobufWriter.h b/dbms/src/Formats/ProtobufWriter.h index aba3a2b2dc6..9a1df919b3b 100644 --- a/dbms/src/Formats/ProtobufWriter.h +++ b/dbms/src/Formats/ProtobufWriter.h @@ -70,6 +70,7 @@ public: bool writeUUID(const UUID & uuid) { return writeValueIfPossible(&IConverter::writeUUID, uuid); } bool writeDate(DayNum date) { return writeValueIfPossible(&IConverter::writeDate, date); } bool writeDateTime(time_t tm) { return writeValueIfPossible(&IConverter::writeDateTime, tm); } + bool writeDateTime(UInt64 tm) { return writeValueIfPossible(&IConverter::writeUInt64, tm); } bool writeDecimal(Decimal32 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal32, decimal, scale); } bool writeDecimal(Decimal64 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal64, decimal, scale); } bool writeDecimal(const Decimal128 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal128, decimal, scale); } From 23b53ee2f9855925f664af67c00a0a7b1552fb81 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Mon, 1 Apr 2019 17:18:13 +0100 Subject: [PATCH 004/147] able to insert DateTime64 objects into the table --- dbms/src/DataTypes/DataTypeDateTime.cpp | 71 +++++++++++++++++-- dbms/src/DataTypes/DataTypeDateTime.h | 18 ++++- .../0_stateless/00921_datetime64.reference | 3 + .../queries/0_stateless/00921_datetime64.sql | 15 ++++ 4 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00921_datetime64.reference create mode 100644 dbms/tests/queries/0_stateless/00921_datetime64.sql diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 9f229ffdd95..502f4562dbb 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -16,6 +16,7 @@ #include +#include namespace DB { @@ -47,6 +48,23 @@ DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_ { } +DataTypeDateTime64::Precision parsePrecision(const std::string & precision_name) +{ + if (precision_name == "MILLI") + return DataTypeDateTime64::Precision::Millis; + else if (precision_name == "MICRO") + return DataTypeDateTime64::Precision::Micros; + return DataTypeDateTime64::Precision::Nanos; +} + +DataTypeDateTime64::DataTypeDateTime64(const std::string & time_zone_name, const std::string & precision_name) + : DataTypeDateTimeBase(time_zone_name), + precision(parsePrecision(precision_name)) +{ +} + + + template const char * DataTypeDateTimeBase::getFamilyName() const { @@ -76,6 +94,43 @@ void DataTypeDateTimeBase::serializeText(const IColumn & column, siz writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); } +void DataTypeDateTime64::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +{ + time_t base_time; + auto full_time = static_cast(column).getData()[row_num]; + UInt32 time_fraction; + int pad_length = 0; + + switch(precision) { + case DataTypeDateTime64::Precision::Millis: { + base_time = full_time / MILLIS_PER_SECOND; + time_fraction = full_time % MILLIS_PER_SECOND; + pad_length = 3; + break; + } + case DataTypeDateTime64::Precision::Micros: { + base_time = full_time / MICROS_PER_SECOND; + time_fraction = full_time % MICROS_PER_SECOND; + pad_length = 6; + break; + } + case DataTypeDateTime64::Precision::Nanos: { + base_time = full_time / NANOS_PER_SECOND; + time_fraction = full_time % NANOS_PER_SECOND; + pad_length = 9; + break; + } + } + + writeDateTimeText(base_time, ostr, time_zone); + writeText(".", 1, ostr); + + /// TODO make this efficient + std::stringstream ss; + ss << std::setfill('0') << std::setw(pad_length) << time_fraction; + writeText(ss.str(), ostr); +} + template void DataTypeDateTimeBase::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { @@ -258,14 +313,18 @@ static DataTypePtr create64(const ASTPtr & arguments) if (!arguments) return std::make_shared(); - if (arguments->children.size() != 1) - throw Exception("DateTime64 data type can optionally have only one argument - time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arguments->children.size() != 2) + throw Exception("DateTime64 data type can optionally have 2 arguments - precision and time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - const auto * arg = arguments->children[0]->as(); - if (!arg || arg->value.getType() != Field::Types::String) - throw Exception("Parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const auto * timezone_arg = arguments->children[0]->as(); + if (!timezone_arg || timezone_arg->value.getType() != Field::Types::String) + throw Exception("Timezone parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(arg->value.get()); + const auto * precision_arg = arguments->children[1]->as(); + if (!precision_arg || precision_arg->value.getType() != Field::Types::String) + throw Exception("Precision parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(timezone_arg->value.get(), precision_arg->value.get()); } void registerDataTypeDateTime(DataTypeFactory & factory) diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index ba6116a2222..5e0d0d6085c 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -57,7 +57,7 @@ public: const DateLUTImpl & getTimeZone() const { return time_zone; } -private: +protected: bool has_explicit_time_zone; const DateLUTImpl & time_zone; const DateLUTImpl & utc_time_zone; @@ -68,7 +68,21 @@ struct DataTypeDateTime : DataTypeDateTimeBase { }; struct DataTypeDateTime64 : DataTypeDateTimeBase { - using DataTypeDateTimeBase::DataTypeDateTimeBase; + enum class Precision { + Millis, + Micros, + Nanos, + }; + static constexpr UInt32 MILLIS_PER_SECOND = 1000; + static constexpr UInt32 MICROS_PER_SECOND = 1000 * 1000; + static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; + + DataTypeDateTime64(const std::string & time_zone_name = "", const std::string & precision_name = ""); + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + +private: + const Precision precision; }; } diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.reference b/dbms/tests/queries/0_stateless/00921_datetime64.reference new file mode 100644 index 00000000000..c866f4b76b8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00921_datetime64.reference @@ -0,0 +1,3 @@ +2 1970-01-01 01:00:01.000000001 1 0 +2 1970-01-01 01:00:01.000000003 3 3 +2 1970-01-01 01:00:01.000000005 5 3 diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql new file mode 100644 index 00000000000..82938dbf5ed --- /dev/null +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -0,0 +1,15 @@ +USE test; + +DROP TABLE IF EXISTS A; +DROP TABLE IF EXISTS B; + +CREATE TABLE A(k UInt32, t DateTime64, a Float64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO A(k,t,a) VALUES (2,1000000001,1),(2,1000000003,3),(2,1000000005,5); + +CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B(k,t,b) VALUES (2,1000000003,3); + +SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); + +DROP TABLE B; +DROP TABLE A; From fc0e8d3658edeee171357b80f845823f80b81234 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Wed, 1 May 2019 23:42:17 +0100 Subject: [PATCH 005/147] read and write datetime64 --- dbms/src/DataTypes/DataTypeDateTime.cpp | 100 ++++++------------------ dbms/src/DataTypes/DataTypeDateTime.h | 16 +--- dbms/src/IO/ReadHelpers.h | 9 +++ dbms/src/IO/WriteHelpers.h | 32 ++++++++ 4 files changed, 66 insertions(+), 91 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 502f4562dbb..652799d310f 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -30,6 +30,10 @@ struct TypeGetter { using Column = ColumnUInt32; static constexpr TypeIndex Index = TypeIndex::DateTime; static constexpr const char * Name = "DateTime"; + + static void write(Type datetime, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { + writeDateTimeText(datetime, buf, date_lut); + } }; template<> @@ -38,6 +42,10 @@ struct TypeGetter { using Column = ColumnUInt64; static constexpr TypeIndex Index = TypeIndex::DateTime64; static constexpr const char * Name = "DateTime64"; + + static void write(Type datetime64, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { + writeDateTime64Text(datetime64, buf, date_lut); + } }; template @@ -48,23 +56,6 @@ DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_ { } -DataTypeDateTime64::Precision parsePrecision(const std::string & precision_name) -{ - if (precision_name == "MILLI") - return DataTypeDateTime64::Precision::Millis; - else if (precision_name == "MICRO") - return DataTypeDateTime64::Precision::Micros; - return DataTypeDateTime64::Precision::Nanos; -} - -DataTypeDateTime64::DataTypeDateTime64(const std::string & time_zone_name, const std::string & precision_name) - : DataTypeDateTimeBase(time_zone_name), - precision(parsePrecision(precision_name)) -{ -} - - - template const char * DataTypeDateTimeBase::getFamilyName() const { @@ -91,44 +82,8 @@ TypeIndex DataTypeDateTimeBase::getTypeId() const template void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); -} - -void DataTypeDateTime64::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const -{ - time_t base_time; - auto full_time = static_cast(column).getData()[row_num]; - UInt32 time_fraction; - int pad_length = 0; - - switch(precision) { - case DataTypeDateTime64::Precision::Millis: { - base_time = full_time / MILLIS_PER_SECOND; - time_fraction = full_time % MILLIS_PER_SECOND; - pad_length = 3; - break; - } - case DataTypeDateTime64::Precision::Micros: { - base_time = full_time / MICROS_PER_SECOND; - time_fraction = full_time % MICROS_PER_SECOND; - pad_length = 6; - break; - } - case DataTypeDateTime64::Precision::Nanos: { - base_time = full_time / NANOS_PER_SECOND; - time_fraction = full_time % NANOS_PER_SECOND; - pad_length = 9; - break; - } - } - - writeDateTimeText(base_time, ostr, time_zone); - writeText(".", 1, ostr); - - /// TODO make this efficient - std::stringstream ss; - ss << std::setfill('0') << std::setw(pad_length) << time_fraction; - writeText(ss.str(), ostr); + using TG = TypeGetter; + TG::write(static_cast(column).getData()[row_num], ostr, time_zone); } template @@ -151,25 +106,22 @@ static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings } } -static inline void readText(UInt64 & /*x*/, ReadBuffer & /*istr*/, const FormatSettings & /*settings*/, const DateLUTImpl & /*time_zone*/, const DateLUTImpl & /*utc_time_zone*/) +static inline void readText(UInt64 & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & /*utc_time_zone*/) { - // TODO implement this -// return; -// switch (settings.date_time_input_format) -// { -// case FormatSettings::DateTimeInputFormat::Basic: -// readDateTimeText(x, istr, time_zone); -// return; -// case FormatSettings::DateTimeInputFormat::BestEffort: -// parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone); -// return; -// } + switch (settings.date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + readDateTime64Text(x, istr, time_zone); + return; + default: + return; + } } template void DataTypeDateTimeBase::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - typename TypeGetter::Type x; + typename TypeGetter::Type x = 0; readText(x, istr, settings, time_zone, utc_time_zone); static_cast::Column &>(column).getData().push_back(x); } @@ -233,7 +185,7 @@ void DataTypeDateTimeBase::serializeTextCSV(const IColumn & column, template void DataTypeDateTimeBase::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - typename TypeGetter::Type x; + typename TypeGetter::Type x = 0; if (istr.eof()) throwReadAfterEOF(); @@ -313,18 +265,14 @@ static DataTypePtr create64(const ASTPtr & arguments) if (!arguments) return std::make_shared(); - if (arguments->children.size() != 2) - throw Exception("DateTime64 data type can optionally have 2 arguments - precision and time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arguments->children.size() != 1) + throw Exception("DateTime64 data type can optionally have only one argument - time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto * timezone_arg = arguments->children[0]->as(); if (!timezone_arg || timezone_arg->value.getType() != Field::Types::String) throw Exception("Timezone parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const auto * precision_arg = arguments->children[1]->as(); - if (!precision_arg || precision_arg->value.getType() != Field::Types::String) - throw Exception("Precision parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(timezone_arg->value.get(), precision_arg->value.get()); + return std::make_shared(timezone_arg->value.get()); } void registerDataTypeDateTime(DataTypeFactory & factory) diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 5e0d0d6085c..d7d00ef5ba8 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -68,21 +68,7 @@ struct DataTypeDateTime : DataTypeDateTimeBase { }; struct DataTypeDateTime64 : DataTypeDateTimeBase { - enum class Precision { - Millis, - Micros, - Nanos, - }; - static constexpr UInt32 MILLIS_PER_SECOND = 1000; - static constexpr UInt32 MICROS_PER_SECOND = 1000 * 1000; - static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; - - DataTypeDateTime64(const std::string & time_zone_name = "", const std::string & precision_name = ""); - - void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; - -private: - const Precision precision; + using DataTypeDateTimeBase::DataTypeDateTimeBase; }; } diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index a24c1b4546c..78347d170ee 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -622,6 +622,15 @@ inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTI readDateTimeTextImpl(datetime, buf, date_lut); } +inline void readDateTime64Text(UInt64 & datetime64, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +{ + time_t datetime = 0; + readDateTimeTextImpl(datetime, buf, date_lut); + buf.ignore(); // ignore the "." + readIntText(datetime64, buf); + datetime64 += 1000 * 1000 * 1000 * datetime; +} + inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { return readDateTimeTextImpl(datetime, buf, date_lut); diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index 622b4aaf07a..e6e23917079 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -669,6 +669,38 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI date_lut.toHour(datetime), date_lut.toMinute(datetime), date_lut.toSecond(datetime)), buf); } +/// In the format YYYY-MM-DD HH:MM:SS.NNNNNNNNN, according to the specified time zone. +template +inline void writeDateTime64Text(UInt64 datetime64, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +{ + if (unlikely(!datetime64)) + { + static const char s[] = + { + '0', '0', '0', '0', date_delimeter, '0', '0', date_delimeter, '0', '0', + between_date_time_delimiter, + '0', '0', time_delimeter, '0', '0', time_delimeter, '0', '0', + fractional_time_delimiter, + '0', '0', '0', '0', '0', '0', '0', '0', '0' + }; + buf.write(s, sizeof(s)); + return; + } + + const UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; + time_t datetime = datetime64 / NANOS_PER_SECOND; + auto nanos_since_second = static_cast(datetime64 % NANOS_PER_SECOND); + + const auto & values = date_lut.getValues(datetime64); + writeDateTimeText( + LocalDateTime(values.year, values.month, values.day_of_month, + date_lut.toHour(datetime), date_lut.toMinute(datetime), date_lut.toSecond(datetime)), buf); + + buf.write(fractional_time_delimiter); + writeIntText(nanos_since_second, buf); +} + + /// Methods for output in binary format. template From 258f4254371103198d42660b6f07e7339cbe219d Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Thu, 2 May 2019 23:40:45 +0100 Subject: [PATCH 006/147] factor out the conversion between datetime64 and the uint64 --- dbms/src/DataTypes/DataTypeDateTime.cpp | 32 ++++-- dbms/src/DataTypes/DataTypeDateTime.h | 21 +++- dbms/src/IO/ReadHelpers.h | 14 ++- dbms/src/IO/WriteHelpers.h | 103 +++++++++--------- .../0_stateless/00921_datetime64.reference | 6 +- .../queries/0_stateless/00921_datetime64.sql | 4 +- 6 files changed, 111 insertions(+), 69 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 652799d310f..b1efe1b34a8 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -21,6 +21,21 @@ namespace DB { +static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; + +DateTime64::Components DateTime64::split() const +{ + auto datetime = static_cast(t / NANOS_PER_SECOND); + auto nanos = static_cast(t % NANOS_PER_SECOND); + return Components { datetime, nanos }; +} + +DateTime64::DateTime64(DateTime64::Components c) + : t {c.datetime * NANOS_PER_SECOND + c.nanos} +{ + assert(c.nanos >= 0 and c.nanos < NANOS_PER_SECOND); +} + template struct TypeGetter; @@ -28,6 +43,8 @@ template<> struct TypeGetter { using Type = time_t; using Column = ColumnUInt32; +// static_assert(sizeof(Column::value_type) == sizeof(Type)); + static constexpr TypeIndex Index = TypeIndex::DateTime; static constexpr const char * Name = "DateTime"; @@ -37,14 +54,16 @@ struct TypeGetter { }; template<> -struct TypeGetter { - using Type = UInt64; +struct TypeGetter { + using Type = DateTime64::Type; using Column = ColumnUInt64; + static_assert(sizeof(Column::value_type) == sizeof(Type)); + static constexpr TypeIndex Index = TypeIndex::DateTime64; static constexpr const char * Name = "DateTime64"; static void write(Type datetime64, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { - writeDateTime64Text(datetime64, buf, date_lut); + writeDateTimeText(datetime64, buf, date_lut); } }; @@ -82,8 +101,7 @@ TypeIndex DataTypeDateTimeBase::getTypeId() const template void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - using TG = TypeGetter; - TG::write(static_cast(column).getData()[row_num], ostr, time_zone); + writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); } template @@ -106,12 +124,12 @@ static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings } } -static inline void readText(UInt64 & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & /*utc_time_zone*/) +static inline void readText(DateTime64 & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & /*utc_time_zone*/) { switch (settings.date_time_input_format) { case FormatSettings::DateTimeInputFormat::Basic: - readDateTime64Text(x, istr, time_zone); + readDateTimeText(x, istr, time_zone); return; default: return; diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index d7d00ef5ba8..53628d85c58 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -67,7 +67,26 @@ struct DataTypeDateTime : DataTypeDateTimeBase { using DataTypeDateTimeBase::DataTypeDateTimeBase; }; -struct DataTypeDateTime64 : DataTypeDateTimeBase { +// this is a separate class to avoid accidental conversions that +// might occur between time_t and the type storing the datetime64 +// time_t might have a different definition on different libcs +struct DateTime64 { + using Type = Int64; + struct Components { + time_t datetime = 0; + UInt32 nanos = 0; + }; + + Components split() const; + explicit DateTime64(Components c); + explicit operator bool() const { + return t != 0; + } +private: + Type t; +}; + +struct DataTypeDateTime64 : DataTypeDateTimeBase { using DataTypeDateTimeBase::DataTypeDateTimeBase; }; diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index 78347d170ee..f1f31fdd419 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -27,6 +27,8 @@ #include #include +#include + #ifdef __clang__ #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wdouble-promotion" @@ -622,13 +624,15 @@ inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTI readDateTimeTextImpl(datetime, buf, date_lut); } -inline void readDateTime64Text(UInt64 & datetime64, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline void readDateTimeText(DateTime64 & datetime64, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { - time_t datetime = 0; - readDateTimeTextImpl(datetime, buf, date_lut); + DateTime64::Components c; + readDateTimeTextImpl(c.datetime, buf, date_lut); buf.ignore(); // ignore the "." - readIntText(datetime64, buf); - datetime64 += 1000 * 1000 * 1000 * datetime; + auto remaining = buf.available(); + readIntText(c.nanos, buf); + c.nanos *= static_cast(std::pow(10, 9 - remaining)); + datetime64 = DateTime64(c); } inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index e6e23917079..5aa5d8f9526 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -28,6 +28,8 @@ #include +#include + namespace DB { @@ -531,6 +533,42 @@ inline void writeUUIDText(const UUID & uuid, WriteBuffer & buf) buf.write(s, sizeof(s)); } +/// Methods for output in binary format. +template +inline std::enable_if_t, void> +writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); } + +inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); } +inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); } +inline void writeBinary(const Int128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const UInt256 & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const Decimal32 & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const Decimal64 & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const Decimal128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const LocalDate & x, WriteBuffer & buf) { writePODBinary(x, buf); } +inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBinary(x, buf); } + + +/// Methods for outputting the value in text form for a tab-separated format. +template +inline std::enable_if_t, void> +writeText(const T & x, WriteBuffer & buf) { writeIntText(x, buf); } + +template +inline std::enable_if_t, void> +writeText(const T & x, WriteBuffer & buf) { writeFloatText(x, buf); } + +inline void writeText(const String & x, WriteBuffer & buf) { writeEscapedString(x, buf); } + +/// Implemented as template specialization (not function overload) to avoid preference over templates on arithmetic types above. +template <> inline void writeText(const bool & x, WriteBuffer & buf) { writeBoolText(x, buf); } + +/// unlike the method for std::string +/// assumes here that `x` is a null-terminated string. +inline void writeText(const char * x, WriteBuffer & buf) { writeEscapedString(x, strlen(x), buf); } +inline void writeText(const char * x, size_t size, WriteBuffer & buf) { writeEscapedString(x, size, buf); } + /// in YYYY-MM-DD format template inline void writeDateText(const LocalDate & date, WriteBuffer & buf) @@ -654,11 +692,11 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI if (unlikely(!datetime)) { static const char s[] = - { - '0', '0', '0', '0', date_delimeter, '0', '0', date_delimeter, '0', '0', - between_date_time_delimiter, - '0', '0', time_delimeter, '0', '0', time_delimeter, '0', '0' - }; + { + '0', '0', '0', '0', date_delimeter, '0', '0', date_delimeter, '0', '0', + between_date_time_delimiter, + '0', '0', time_delimeter, '0', '0', time_delimeter, '0', '0' + }; buf.write(s, sizeof(s)); return; } @@ -666,12 +704,12 @@ inline void writeDateTimeText(time_t datetime, WriteBuffer & buf, const DateLUTI const auto & values = date_lut.getValues(datetime); writeDateTimeText( LocalDateTime(values.year, values.month, values.day_of_month, - date_lut.toHour(datetime), date_lut.toMinute(datetime), date_lut.toSecond(datetime)), buf); + date_lut.toHour(datetime), date_lut.toMinute(datetime), date_lut.toSecond(datetime)), buf); } /// In the format YYYY-MM-DD HH:MM:SS.NNNNNNNNN, according to the specified time zone. template -inline void writeDateTime64Text(UInt64 datetime64, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline void writeDateTimeText(DateTime64 datetime64, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { if (unlikely(!datetime64)) { @@ -687,57 +725,20 @@ inline void writeDateTime64Text(UInt64 datetime64, WriteBuffer & buf, const Date return; } - const UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; - time_t datetime = datetime64 / NANOS_PER_SECOND; - auto nanos_since_second = static_cast(datetime64 % NANOS_PER_SECOND); - - const auto & values = date_lut.getValues(datetime64); + auto c = datetime64.split(); + const auto & values = date_lut.getValues(c.datetime); writeDateTimeText( LocalDateTime(values.year, values.month, values.day_of_month, - date_lut.toHour(datetime), date_lut.toMinute(datetime), date_lut.toSecond(datetime)), buf); + date_lut.toHour(c.datetime), date_lut.toMinute(c.datetime), date_lut.toSecond(c.datetime)), buf); buf.write(fractional_time_delimiter); - writeIntText(nanos_since_second, buf); + + char data[9]; + int written = sprintf(data, "%09d", c.nanos); + writeText(&data[0], static_cast(written), buf); } - -/// Methods for output in binary format. -template -inline std::enable_if_t, void> -writeBinary(const T & x, WriteBuffer & buf) { writePODBinary(x, buf); } - -inline void writeBinary(const String & x, WriteBuffer & buf) { writeStringBinary(x, buf); } -inline void writeBinary(const StringRef & x, WriteBuffer & buf) { writeStringBinary(x, buf); } -inline void writeBinary(const Int128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const UInt128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const UInt256 & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const Decimal32 & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const Decimal64 & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const Decimal128 & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const LocalDate & x, WriteBuffer & buf) { writePODBinary(x, buf); } -inline void writeBinary(const LocalDateTime & x, WriteBuffer & buf) { writePODBinary(x, buf); } - - -/// Methods for outputting the value in text form for a tab-separated format. -template -inline std::enable_if_t, void> -writeText(const T & x, WriteBuffer & buf) { writeIntText(x, buf); } - -template -inline std::enable_if_t, void> -writeText(const T & x, WriteBuffer & buf) { writeFloatText(x, buf); } - -inline void writeText(const String & x, WriteBuffer & buf) { writeEscapedString(x, buf); } - -/// Implemented as template specialization (not function overload) to avoid preference over templates on arithmetic types above. -template <> inline void writeText(const bool & x, WriteBuffer & buf) { writeBoolText(x, buf); } - -/// unlike the method for std::string -/// assumes here that `x` is a null-terminated string. -inline void writeText(const char * x, WriteBuffer & buf) { writeEscapedString(x, strlen(x), buf); } -inline void writeText(const char * x, size_t size, WriteBuffer & buf) { writeEscapedString(x, size, buf); } - inline void writeText(const LocalDate & x, WriteBuffer & buf) { writeDateText(x, buf); } inline void writeText(const LocalDateTime & x, WriteBuffer & buf) { writeDateTimeText(x, buf); } inline void writeText(const UUID & x, WriteBuffer & buf) { writeUUIDText(x, buf); } diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.reference b/dbms/tests/queries/0_stateless/00921_datetime64.reference index c866f4b76b8..31ac3f9f76b 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.reference +++ b/dbms/tests/queries/0_stateless/00921_datetime64.reference @@ -1,3 +1,3 @@ -2 1970-01-01 01:00:01.000000001 1 0 -2 1970-01-01 01:00:01.000000003 3 3 -2 1970-01-01 01:00:01.000000005 5 3 +2 1970-01-01 00:00:00.000000001 1 0 +2 1970-01-01 00:00:00.000000003 3 3 +2 1970-01-01 00:00:00.000000005 5 3 diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql index 82938dbf5ed..35009a2c3bb 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.sql +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -4,10 +4,10 @@ DROP TABLE IF EXISTS A; DROP TABLE IF EXISTS B; CREATE TABLE A(k UInt32, t DateTime64, a Float64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO A(k,t,a) VALUES (2,1000000001,1),(2,1000000003,3),(2,1000000005,5); +INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5); CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (2,1000000003,3); +INSERT INTO B(k,t,b) VALUES (2,2,3); SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); From 176fc98a32128b0bbc18226735213ce8713513db Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Fri, 3 May 2019 00:46:04 +0100 Subject: [PATCH 007/147] also parse datetime64 from string --- dbms/src/DataTypes/DataTypeDateTime.cpp | 11 +++++---- dbms/src/DataTypes/DataTypeDateTime.h | 2 ++ dbms/src/DataTypes/IDataType.h | 1 + dbms/src/IO/WriteHelpers.h | 2 +- dbms/src/Interpreters/Join.cpp | 2 +- dbms/src/Interpreters/RowRefs.cpp | 20 ++++++++++++---- dbms/src/Interpreters/RowRefs.h | 8 +++++-- dbms/src/Interpreters/convertFieldToType.cpp | 23 +++++++++++++++++++ .../queries/0_stateless/00921_datetime64.sql | 7 +++--- 9 files changed, 61 insertions(+), 15 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index b1efe1b34a8..92af33d24d4 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -48,7 +48,8 @@ struct TypeGetter { static constexpr TypeIndex Index = TypeIndex::DateTime; static constexpr const char * Name = "DateTime"; - static void write(Type datetime, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { + static void write(Type datetime, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) + { writeDateTimeText(datetime, buf, date_lut); } }; @@ -62,8 +63,9 @@ struct TypeGetter { static constexpr TypeIndex Index = TypeIndex::DateTime64; static constexpr const char * Name = "DateTime64"; - static void write(Type datetime64, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { - writeDateTimeText(datetime64, buf, date_lut); + static void write(DateTime64::Type t, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) + { + writeDateTimeText(DateTime64(t), buf, date_lut); } }; @@ -101,7 +103,8 @@ TypeIndex DataTypeDateTimeBase::getTypeId() const template void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); + using TG = TypeGetter; + TG::write(static_cast(column).getData()[row_num], ostr, time_zone); } template diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 53628d85c58..d9d89af0cbe 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -79,9 +79,11 @@ struct DateTime64 { Components split() const; explicit DateTime64(Components c); + explicit DateTime64(Type tt) : t{tt} {} explicit operator bool() const { return t != 0; } + Type get() const { return t; } private: Type t; }; diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 60124cd3d5d..78be302d7a7 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -527,6 +527,7 @@ struct WhichDataType bool isDate() const { return idx == TypeIndex::Date; } bool isDateTime() const { return idx == TypeIndex::DateTime; } + bool isDateTime64() const { return idx == TypeIndex::DateTime64; } bool isDateOrDateTime() const { return isDate() || isDateTime(); } bool isString() const { return idx == TypeIndex::String; } diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index 5aa5d8f9526..9112a485668 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -733,7 +733,7 @@ inline void writeDateTimeText(DateTime64 datetime64, WriteBuffer & buf, const Da buf.write(fractional_time_delimiter); - char data[9]; + char data[10]; int written = sprintf(data, "%09d", c.nanos); writeText(&data[0], static_cast(written), buf); } diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 08d42331795..7d8f664ab13 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -304,7 +304,7 @@ void Join::setSampleBlock(const Block & block) asof_type = AsofRowRefs::getTypeSize(asof_column, asof_size); if (!asof_type) { - std::string msg = "ASOF join not supported for type"; + std::string msg = "ASOF join not supported for type: "; msg += asof_column->getFamilyName(); throw Exception(msg, ErrorCodes::BAD_TYPE_OF_FIELD); } diff --git a/dbms/src/Interpreters/RowRefs.cpp b/dbms/src/Interpreters/RowRefs.cpp index 46e665ab423..1be4d526d26 100644 --- a/dbms/src/Interpreters/RowRefs.cpp +++ b/dbms/src/Interpreters/RowRefs.cpp @@ -18,8 +18,10 @@ void callWithType(AsofRowRefs::Type which, F && f) { switch (which) { - case AsofRowRefs::Type::key32: return f(UInt32()); - case AsofRowRefs::Type::key64: return f(UInt64()); + case AsofRowRefs::Type::keyu32: return f(UInt32()); + case AsofRowRefs::Type::keyu64: return f(UInt64()); + case AsofRowRefs::Type::keyi32: return f(Int32()); + case AsofRowRefs::Type::keyi64: return f(Int64()); case AsofRowRefs::Type::keyf32: return f(Float32()); case AsofRowRefs::Type::keyf64: return f(Float64()); } @@ -89,12 +91,22 @@ std::optional AsofRowRefs::getTypeSize(const IColumn * asof_c if (typeid_cast *>(asof_column)) { size = sizeof(UInt32); - return Type::key32; + return Type::keyu32; } else if (typeid_cast *>(asof_column)) { size = sizeof(UInt64); - return Type::key64; + return Type::keyu64; + } + else if (typeid_cast *>(asof_column)) + { + size = sizeof(Int32); + return Type::keyi32; + } + else if (typeid_cast *>(asof_column)) + { + size = sizeof(Int64); + return Type::keyi64; } else if (typeid_cast *>(asof_column)) { diff --git a/dbms/src/Interpreters/RowRefs.h b/dbms/src/Interpreters/RowRefs.h index 76efb33543d..c168c0302e1 100644 --- a/dbms/src/Interpreters/RowRefs.h +++ b/dbms/src/Interpreters/RowRefs.h @@ -126,13 +126,17 @@ public: using Lookups = std::variant< Entry::LookupPtr, Entry::LookupPtr, + Entry::LookupPtr, + Entry::LookupPtr, Entry::LookupPtr, Entry::LookupPtr>; enum class Type { - key32, - key64, + keyu32, + keyu64, + keyi32, + keyi64, keyf32, keyf64, }; diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 0690f24159a..a129ebdb284 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -136,6 +136,18 @@ UInt64 stringToDateTime(const String & s) return UInt64(date_time); } +DateTime64::Type stringToDateTime64(const String & s) +{ + ReadBufferFromString in(s); + DateTime64 datetime64 {0}; + + readDateTimeText(datetime64, in); + if (!in.eof()) + throw Exception("String is too long for DateTime64: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); + + return datetime64.get(); +} + Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint) { WhichDataType which_type(type); @@ -143,6 +155,8 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (from_type_hint) which_from_type = WhichDataType(*from_type_hint); + std::cout << "which_type=" << (int)which_type.idx << " which_from_type=" << (int)which_from_type.idx << std::endl; + /// Conversion between Date and DateTime and vice versa. if (which_type.isDate() && which_from_type.isDateTime()) { @@ -152,6 +166,10 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); } +// else if (which_type.isDateTime64()) +// { +// throw Exception{"DateTime64 conversion not yet done error: unknown numeric type " + type.getName(), ErrorCodes::LOGICAL_ERROR}; +// } else if (type.isValueRepresentedByNumber()) { if (which_type.isUInt8()) return convertNumericType(src, type); @@ -187,6 +205,11 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID /// Convert 'YYYY-MM-DD hh:mm:ss' Strings to DateTime return stringToDateTime(src.get()); } + else if (which_type.isDateTime64()) + { + /// Convert 'YYYY-MM-DD hh:mm:ss.NNNNNNNNN' Strings to DateTime + return stringToDateTime64(src.get()); + } else if (which_type.isUUID()) { return stringToUUID(src.get()); diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql index 35009a2c3bb..8f1d016e5eb 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.sql +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -4,12 +4,13 @@ DROP TABLE IF EXISTS A; DROP TABLE IF EXISTS B; CREATE TABLE A(k UInt32, t DateTime64, a Float64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO A(k,t,a) VALUES (2,1,1),(2,3,3),(2,5,5); +INSERT INTO A(k,t,a) VALUES (2,1,1),(2,50000,3); +INSERT INTO A(k,t,a) VALUES (2,'2019-05-03 00:25:25.123456789',5); CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (2,2,3); +INSERT INTO B(k,t,b) VALUES (2,40000,3); -SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); +SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t) FORMAT CSV; DROP TABLE B; DROP TABLE A; From c5af12ad6833b62ba97f5338d908500ddb076c8b Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Fri, 3 May 2019 01:14:51 +0100 Subject: [PATCH 008/147] fixed up test --- dbms/src/DataTypes/DataTypeDateTime.cpp | 19 +++++++------------ dbms/src/DataTypes/IDataType.h | 2 +- dbms/src/Interpreters/convertFieldToType.cpp | 6 ------ .../0_stateless/00921_datetime64.reference | 6 +++--- .../queries/0_stateless/00921_datetime64.sql | 2 +- 5 files changed, 12 insertions(+), 23 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 92af33d24d4..422f7916cfe 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -43,30 +43,25 @@ template<> struct TypeGetter { using Type = time_t; using Column = ColumnUInt32; -// static_assert(sizeof(Column::value_type) == sizeof(Type)); + using Convertor = time_t; + + // This is not actually true, which is bad form as it truncates the value from time_t (long int) into uint32_t + // static_assert(sizeof(Column::value_type) == sizeof(Type)); static constexpr TypeIndex Index = TypeIndex::DateTime; static constexpr const char * Name = "DateTime"; - - static void write(Type datetime, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) - { - writeDateTimeText(datetime, buf, date_lut); - } }; template<> struct TypeGetter { using Type = DateTime64::Type; using Column = ColumnUInt64; + using Convertor = DateTime64; + static_assert(sizeof(Column::value_type) == sizeof(Type)); static constexpr TypeIndex Index = TypeIndex::DateTime64; static constexpr const char * Name = "DateTime64"; - - static void write(DateTime64::Type t, WriteBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) - { - writeDateTimeText(DateTime64(t), buf, date_lut); - } }; template @@ -104,7 +99,7 @@ template void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { using TG = TypeGetter; - TG::write(static_cast(column).getData()[row_num], ostr, time_zone); + writeDateTimeText(typename TG::Convertor(static_cast(column).getData()[row_num]), ostr, time_zone); } template diff --git a/dbms/src/DataTypes/IDataType.h b/dbms/src/DataTypes/IDataType.h index 78be302d7a7..420451f45d3 100644 --- a/dbms/src/DataTypes/IDataType.h +++ b/dbms/src/DataTypes/IDataType.h @@ -528,7 +528,7 @@ struct WhichDataType bool isDate() const { return idx == TypeIndex::Date; } bool isDateTime() const { return idx == TypeIndex::DateTime; } bool isDateTime64() const { return idx == TypeIndex::DateTime64; } - bool isDateOrDateTime() const { return isDate() || isDateTime(); } + bool isDateOrDateTime() const { return isDate() || isDateTime() || isDateTime64(); } bool isString() const { return idx == TypeIndex::String; } bool isFixedString() const { return idx == TypeIndex::FixedString; } diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index a129ebdb284..086757f9913 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -155,8 +155,6 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID if (from_type_hint) which_from_type = WhichDataType(*from_type_hint); - std::cout << "which_type=" << (int)which_type.idx << " which_from_type=" << (int)which_from_type.idx << std::endl; - /// Conversion between Date and DateTime and vice versa. if (which_type.isDate() && which_from_type.isDateTime()) { @@ -166,10 +164,6 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID { return static_cast(type).getTimeZone().fromDayNum(DayNum(src.get())); } -// else if (which_type.isDateTime64()) -// { -// throw Exception{"DateTime64 conversion not yet done error: unknown numeric type " + type.getName(), ErrorCodes::LOGICAL_ERROR}; -// } else if (type.isValueRepresentedByNumber()) { if (which_type.isUInt8()) return convertNumericType(src, type); diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.reference b/dbms/tests/queries/0_stateless/00921_datetime64.reference index 31ac3f9f76b..e7f7766d579 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.reference +++ b/dbms/tests/queries/0_stateless/00921_datetime64.reference @@ -1,3 +1,3 @@ -2 1970-01-01 00:00:00.000000001 1 0 -2 1970-01-01 00:00:00.000000003 3 3 -2 1970-01-01 00:00:00.000000005 5 3 +2 1970-01-01 01:00:00.000000001 1 0 +2 1970-01-01 01:00:00.000050000 3 3 +2 2019-05-03 00:25:25.123456789 5 3 diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql index 8f1d016e5eb..be7d7b834ef 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.sql +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -10,7 +10,7 @@ INSERT INTO A(k,t,a) VALUES (2,'2019-05-03 00:25:25.123456789',5); CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); INSERT INTO B(k,t,b) VALUES (2,40000,3); -SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t) FORMAT CSV; +SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); DROP TABLE B; DROP TABLE A; From 118e498eab01a7187466e0135e20ad8c2cda484e Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 4 May 2019 01:07:54 +0100 Subject: [PATCH 009/147] working timezone conversion in the toString function --- dbms/src/Core/callOnTypeIndex.h | 2 ++ dbms/src/DataTypes/DataTypeDateTime.cpp | 1 + dbms/src/Functions/FunctionsConversion.h | 27 +++++++++++++++++-- .../0_stateless/00921_datetime64.reference | 6 ++--- .../queries/0_stateless/00921_datetime64.sql | 2 +- 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/dbms/src/Core/callOnTypeIndex.h b/dbms/src/Core/callOnTypeIndex.h index ad2a98d8112..ac4e555212c 100644 --- a/dbms/src/Core/callOnTypeIndex.h +++ b/dbms/src/Core/callOnTypeIndex.h @@ -146,6 +146,7 @@ inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f) class DataTypeDate; class DataTypeDateTime; +class DataTypeDateTime64; class DataTypeString; class DataTypeFixedString; class DataTypeUUID; @@ -178,6 +179,7 @@ bool callOnIndexAndDataType(TypeIndex number, F && f) case TypeIndex::Date: return f(TypePair()); case TypeIndex::DateTime: return f(TypePair()); + case TypeIndex::DateTime64: return f(TypePair()); case TypeIndex::String: return f(TypePair()); case TypeIndex::FixedString: return f(TypePair()); diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 422f7916cfe..c90b6775712 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -99,6 +99,7 @@ template void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { using TG = TypeGetter; + std::cout << "serializing text for DataTypeDateTimeBase = " << TG::Name << " tz=" << time_zone.getTimeZone() << std::endl; writeDateTimeText(typename TG::Convertor(static_cast(column).getData()[row_num]), ostr, time_zone); } diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 891e254d9cf..801c5f8cae1 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -216,6 +216,7 @@ struct FormatImpl { static void execute(const typename DataType::FieldType x, WriteBuffer & wb, const DataType *, const DateLUTImpl *) { + std::cout << "!!!!!!!!!!!!!!!!!!!!!!! performing FormatImpl<>" << std::endl; writeText(x, wb); } }; @@ -225,6 +226,7 @@ struct FormatImpl { static void execute(const DataTypeDate::FieldType x, WriteBuffer & wb, const DataTypeDate *, const DateLUTImpl *) { + std::cout << "!!!!!!!!!!!!!!!!!!!!!!! performing FormatImpl v=" << x << std::endl; writeDateText(DayNum(x), wb); } }; @@ -234,10 +236,22 @@ struct FormatImpl { static void execute(const DataTypeDateTime::FieldType x, WriteBuffer & wb, const DataTypeDateTime *, const DateLUTImpl * time_zone) { + std::cout << "!!!!!!!!!!!!!!!!!!!!!!! performing FormatImpl v=" << x << " tz=" << time_zone->getTimeZone() << std::endl; writeDateTimeText(x, wb, *time_zone); } }; +template <> +struct FormatImpl +{ + static void execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 *, const DateLUTImpl * time_zone) + { + std::cout << "!!!!!!!!!!!!!!!!!!!!!!! performing FormatImpl v=" << x << " tz=" << (void*)time_zone << std::endl; + writeDateTimeText(DateTime64(x), wb, *time_zone); + } +}; + + template struct FormatImpl> { @@ -276,13 +290,14 @@ struct ConvertImpl(*col_with_type_and_name.type); const DateLUTImpl * time_zone = nullptr; /// For argument of DateTime type, second argument with time zone could be specified. - if constexpr (std::is_same_v) + if constexpr (std::is_same_v || std::is_same_v) time_zone = &extractTimeZoneFromFunctionArguments(block, arguments, 1, 0); if (const auto col_from = checkAndGetColumn(col_with_type_and_name.column.get())) @@ -293,11 +308,14 @@ struct ConvertImplgetChars(); ColumnString::Offsets & offsets_to = col_to->getOffsets(); size_t size = vec_from.size(); + std::cout << "vec_form size=" << size << " bytes=" << vec_from.allocated_bytes() << std::endl; if constexpr (std::is_same_v) data_to.resize(size * (strlen("YYYY-MM-DD") + 1)); else if constexpr (std::is_same_v) data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1)); + else if constexpr (std::is_same_v) + data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.nnnnnnnnn") + 1)); else data_to.resize(size * 3); /// Arbitary @@ -816,7 +834,7 @@ public: || std::is_same_v; if (!(to_date_or_time - || (std::is_same_v && WhichDataType(arguments[0].type).isDateTime()))) + || (std::is_same_v && (WhichDataType(arguments[0].type).isDateTime() || WhichDataType(arguments[0].type).isDateTime64())))) { throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 1.", @@ -826,6 +844,8 @@ public: if (std::is_same_v) return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); + else if (std::is_same_v) + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); else return std::make_shared(); } @@ -907,12 +927,15 @@ private: } else ConvertImpl::execute(block, arguments, result, input_rows_count); + + std::cout << "finished running convertImpl " << std::endl; return true; }; bool done = callOnIndexAndDataType(from_type->getTypeId(), call); if (!done) { + std::cout << "not done yet, falling back on generic conversion" << std::endl; /// Generic conversion of any type to String. if (std::is_same_v) { diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.reference b/dbms/tests/queries/0_stateless/00921_datetime64.reference index e7f7766d579..60a2e77034a 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.reference +++ b/dbms/tests/queries/0_stateless/00921_datetime64.reference @@ -1,3 +1,3 @@ -2 1970-01-01 01:00:00.000000001 1 0 -2 1970-01-01 01:00:00.000050000 3 3 -2 2019-05-03 00:25:25.123456789 5 3 +2 1970-01-01 03:00:00.000000001 1 0 +2 1970-01-01 03:00:00.000050000 3 3 +2 2019-05-03 02:25:25.123456789 5 3 diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql index be7d7b834ef..5c96c773171 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.sql +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -10,7 +10,7 @@ INSERT INTO A(k,t,a) VALUES (2,'2019-05-03 00:25:25.123456789',5); CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); INSERT INTO B(k,t,b) VALUES (2,40000,3); -SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); +SELECT k, toString(A.t, 'Europe/Moscow'), a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); DROP TABLE B; DROP TABLE A; From 81ca591b2366fea17ebf2073bce9331e7cf27123 Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 4 May 2019 13:13:59 +0100 Subject: [PATCH 010/147] various formatters are working, though there is still an issue with toDate for some reason, maybe the data is unaligned? --- dbms/src/Core/callOnTypeIndex.h | 4 +- dbms/src/DataTypes/DataTypeDateTime.h | 7 +++ dbms/src/Functions/DateTimeTransforms.h | 4 +- .../FunctionDateOrDateTimeToSomething.h | 17 ++++++- dbms/src/Functions/now.cpp | 44 ++++++++++++++++--- .../queries/0_stateless/00921_datetime64.sql | 21 +++++---- 6 files changed, 77 insertions(+), 20 deletions(-) diff --git a/dbms/src/Core/callOnTypeIndex.h b/dbms/src/Core/callOnTypeIndex.h index ac4e555212c..e33a3b76772 100644 --- a/dbms/src/Core/callOnTypeIndex.h +++ b/dbms/src/Core/callOnTypeIndex.h @@ -3,6 +3,7 @@ #include #include +#include namespace DB { @@ -71,6 +72,7 @@ bool callOnBasicType(TypeIndex number, F && f) { case TypeIndex::Date: return f(TypePair()); case TypeIndex::DateTime: return f(TypePair()); + case TypeIndex::DateTime64: return f(TypePair()); default: break; } @@ -145,8 +147,6 @@ inline bool callOnBasicTypes(TypeIndex type_num1, TypeIndex type_num2, F && f) class DataTypeDate; -class DataTypeDateTime; -class DataTypeDateTime64; class DataTypeString; class DataTypeFixedString; class DataTypeUUID; diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index d9d89af0cbe..980ab08f0d2 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -5,6 +5,13 @@ class DateLUTImpl; + +template class> +struct is_instance : public std::false_type {}; + +template class U> +struct is_instance, U> : public std::true_type {}; + namespace DB { diff --git a/dbms/src/Functions/DateTimeTransforms.h b/dbms/src/Functions/DateTimeTransforms.h index 6890b513602..f43cb76dbc0 100644 --- a/dbms/src/Functions/DateTimeTransforms.h +++ b/dbms/src/Functions/DateTimeTransforms.h @@ -5,7 +5,7 @@ #include #include #include - +#include namespace DB { @@ -47,10 +47,12 @@ struct ToDateImpl static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { + std::cout << "converting UInt32 t=" << t << " " << name << std::endl; return UInt16(time_zone.toDayNum(t)); } static inline UInt16 execute(UInt16 d, const DateLUTImpl &) { + std::cout << "converting UInt16 d=" << d << " " << name << std::endl; return d; } diff --git a/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h b/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h index bb32230a5b1..ad9aed612ee 100644 --- a/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -15,6 +15,17 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +template +struct WithDateTime64Converter : public Transform { + static inline auto execute(DataTypeDateTime64::FieldType t, const DateLUTImpl & time_zone) + { + auto x = DateTime64(t); + auto res = Transform::execute(static_cast(x.split().datetime), time_zone); + std::cout << "calling through datetime64 wrapper v=" << x.get() << "tz= " << time_zone.getTimeZone() << " result=" << res << std::endl; + return res; + } +}; + /// See DateTimeTransforms.h template @@ -67,8 +78,8 @@ public: ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// For DateTime, if time zone is specified, attach it to type. - if (std::is_same_v) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); + if constexpr (is_instance{}) + return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); else return std::make_shared(); } @@ -85,6 +96,8 @@ public: DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); else if (which.isDateTime()) DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); + else if (which.isDateTime64()) + DateTimeTransformImpl>::execute(block, arguments, result, input_rows_count); else throw Exception("Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/Functions/now.cpp b/dbms/src/Functions/now.cpp index 59501d96088..2706d8b849a 100644 --- a/dbms/src/Functions/now.cpp +++ b/dbms/src/Functions/now.cpp @@ -7,12 +7,43 @@ namespace DB { +template +struct TypeGetter; + +template<> +struct TypeGetter { + using Type = DataTypeDateTime64; + static constexpr auto name = "now64"; + + static DateTime64::Type now() { + long int ns; + time_t sec; + timespec spec; + clock_gettime(CLOCK_REALTIME, &spec); + sec = spec.tv_sec; + ns = spec.tv_nsec; + return 1000 * 1000 * 1000 * sec + ns; + } +}; + +template<> +struct TypeGetter { + using Type = DataTypeDateTime; + static constexpr auto name = "now"; + + static UInt64 now() { + return static_cast(time(nullptr)); + } +}; + + /// Get the current time. (It is a constant, it is evaluated once for the entire query.) +template class FunctionNow : public IFunction { public: - static constexpr auto name = "now"; - static FunctionPtr create(const Context &) { return std::make_shared(); } + static constexpr auto name = TypeGetter::name; + static FunctionPtr create(const Context &) { return std::make_shared>(); } String getName() const override { @@ -23,22 +54,21 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { - return std::make_shared(); + return std::make_shared::Type>(); } bool isDeterministic() const override { return false; } void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = DataTypeDateTime().createColumnConst( - input_rows_count, - static_cast(time(nullptr))); + block.getByPosition(result).column = typename TypeGetter::Type().createColumnConst(input_rows_count, TypeGetter::now()); } }; void registerFunctionNow(FunctionFactory & factory) { - factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction>(FunctionFactory::CaseInsensitive); } } diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql index 5c96c773171..bce8c6ad2a5 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.sql +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -1,16 +1,21 @@ USE test; DROP TABLE IF EXISTS A; -DROP TABLE IF EXISTS B; +-- DROP TABLE IF EXISTS B; -CREATE TABLE A(k UInt32, t DateTime64, a Float64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO A(k,t,a) VALUES (2,1,1),(2,50000,3); -INSERT INTO A(k,t,a) VALUES (2,'2019-05-03 00:25:25.123456789',5); +CREATE TABLE A(t DateTime64, a Float64) ENGINE = MergeTree() ORDER BY t; +-- INSERT INTO A(t,a) VALUES (1,1),(50000,3); +INSERT INTO A(t,a) VALUES ('2019-05-03 11:25:25.123456789',5); +-- INSERT INTO A(t,a) VALUES (1556841600034000001,5); +-- INSERT INTO A(t,a) VALUES (now64(),5); -CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); -INSERT INTO B(k,t,b) VALUES (2,40000,3); +-- 1556841600034 -SELECT k, toString(A.t, 'Europe/Moscow'), a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); -DROP TABLE B; +-- CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); +-- INSERT INTO B(k,t,b) VALUES (2,40000,3); + +SELECT toString(t, 'UTC'), toDate(t), toStartOfDay(t), toStartOfQuarter(t), toTime(t), toStartOfMinute(t), a FROM A ORDER BY t; + +-- DROP TABLE B; DROP TABLE A; From 3d7656ce6196a8c72df36b508d5ae2141b1b813d Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Sat, 4 May 2019 16:33:17 +0100 Subject: [PATCH 011/147] working datetime64 with some conversions --- dbms/src/Core/DateTime64.cpp | 22 +++++++++++++++ dbms/src/Core/DateTime64.h | 28 +++++++++++++++++++ dbms/src/DataTypes/DataTypeDateTime.cpp | 16 ----------- dbms/src/DataTypes/DataTypeDateTime.h | 22 +-------------- dbms/src/Functions/DateTimeTransforms.h | 6 ++++ dbms/src/Functions/FunctionsConversion.h | 3 ++ .../0_stateless/00921_datetime64.reference | 5 ++-- .../queries/0_stateless/00921_datetime64.sql | 21 +++++--------- 8 files changed, 69 insertions(+), 54 deletions(-) create mode 100644 dbms/src/Core/DateTime64.cpp create mode 100644 dbms/src/Core/DateTime64.h diff --git a/dbms/src/Core/DateTime64.cpp b/dbms/src/Core/DateTime64.cpp new file mode 100644 index 00000000000..53315a60a6d --- /dev/null +++ b/dbms/src/Core/DateTime64.cpp @@ -0,0 +1,22 @@ +#include + +#include + +namespace DB { + +static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; + +DateTime64::Components DateTime64::split() const +{ + auto datetime = static_cast(t / NANOS_PER_SECOND); + auto nanos = static_cast(t % NANOS_PER_SECOND); + return Components { datetime, nanos }; +} + +DateTime64::DateTime64(DateTime64::Components c) + : t {c.datetime * NANOS_PER_SECOND + c.nanos} +{ + assert(c.nanos < NANOS_PER_SECOND); +} + +} diff --git a/dbms/src/Core/DateTime64.h b/dbms/src/Core/DateTime64.h new file mode 100644 index 00000000000..19544d0a698 --- /dev/null +++ b/dbms/src/Core/DateTime64.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace DB { + +// this is a separate struct to avoid accidental conversions that +// might occur between time_t and the type storing the datetime64 +// time_t might have a different definition on different libcs +struct DateTime64 { + using Type = Int64; + struct Components { + time_t datetime = 0; + UInt32 nanos = 0; + }; + + Components split() const; + explicit DateTime64(Components c); + explicit DateTime64(Type tt) : t{tt} {} + explicit operator bool() const { + return t != 0; + } + Type get() const { return t; } +private: + Type t; +}; + +} diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index c90b6775712..df1af4360b3 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -21,21 +21,6 @@ namespace DB { -static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; - -DateTime64::Components DateTime64::split() const -{ - auto datetime = static_cast(t / NANOS_PER_SECOND); - auto nanos = static_cast(t % NANOS_PER_SECOND); - return Components { datetime, nanos }; -} - -DateTime64::DateTime64(DateTime64::Components c) - : t {c.datetime * NANOS_PER_SECOND + c.nanos} -{ - assert(c.nanos >= 0 and c.nanos < NANOS_PER_SECOND); -} - template struct TypeGetter; @@ -99,7 +84,6 @@ template void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { using TG = TypeGetter; - std::cout << "serializing text for DataTypeDateTimeBase = " << TG::Name << " tz=" << time_zone.getTimeZone() << std::endl; writeDateTimeText(typename TG::Convertor(static_cast(column).getData()[row_num]), ostr, time_zone); } diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 980ab08f0d2..32acb20904c 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -1,5 +1,6 @@ #pragma once +#include #include @@ -74,27 +75,6 @@ struct DataTypeDateTime : DataTypeDateTimeBase { using DataTypeDateTimeBase::DataTypeDateTimeBase; }; -// this is a separate class to avoid accidental conversions that -// might occur between time_t and the type storing the datetime64 -// time_t might have a different definition on different libcs -struct DateTime64 { - using Type = Int64; - struct Components { - time_t datetime = 0; - UInt32 nanos = 0; - }; - - Components split() const; - explicit DateTime64(Components c); - explicit DateTime64(Type tt) : t{tt} {} - explicit operator bool() const { - return t != 0; - } - Type get() const { return t; } -private: - Type t; -}; - struct DataTypeDateTime64 : DataTypeDateTimeBase { using DataTypeDateTimeBase::DataTypeDateTimeBase; }; diff --git a/dbms/src/Functions/DateTimeTransforms.h b/dbms/src/Functions/DateTimeTransforms.h index f43cb76dbc0..857b76241b7 100644 --- a/dbms/src/Functions/DateTimeTransforms.h +++ b/dbms/src/Functions/DateTimeTransforms.h @@ -45,6 +45,12 @@ struct ToDateImpl { static constexpr auto name = "toDate"; + static inline UInt16 execute(DateTime64::Type t, const DateLUTImpl & time_zone) + { + std::cout << "converting DateTime64 t=" << t << " " << name << std::endl; + return UInt16(time_zone.toDayNum(DateTime64(t).split().datetime)); + } + static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) { std::cout << "converting UInt32 t=" << t << " " << name << std::endl; diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 801c5f8cae1..fba3ad2d552 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -188,6 +188,9 @@ struct ToDateTransform32Or64 template struct ConvertImpl : DateTimeTransformImpl {}; +template struct ConvertImpl + : DateTimeTransformImpl {}; + /** Special case of converting (U)Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to Date. * If number is less than 65536, then it is treated as DayNum, and if greater or equals, then as unix timestamp. * It's a bit illogical, as we actually have two functions in one. diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.reference b/dbms/tests/queries/0_stateless/00921_datetime64.reference index 60a2e77034a..e670d61cf23 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.reference +++ b/dbms/tests/queries/0_stateless/00921_datetime64.reference @@ -1,3 +1,2 @@ -2 1970-01-01 03:00:00.000000001 1 0 -2 1970-01-01 03:00:00.000050000 3 3 -2 2019-05-03 02:25:25.123456789 5 3 +2019-05-03 10:25:25.123456789 2019-05-03 2019-05-03 00:00:00 2019-04-01 1970-01-02 11:25:25 2019-05-03 11:25:00 +2019-05-03 10:25:25.123456789 2019-05-03 2019-05-03 00:00:00 2019-04-01 1970-01-02 11:25:25 2019-05-03 11:25:00 diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql index bce8c6ad2a5..54834bdc2f1 100644 --- a/dbms/tests/queries/0_stateless/00921_datetime64.sql +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -1,21 +1,14 @@ USE test; DROP TABLE IF EXISTS A; --- DROP TABLE IF EXISTS B; -CREATE TABLE A(t DateTime64, a Float64) ENGINE = MergeTree() ORDER BY t; --- INSERT INTO A(t,a) VALUES (1,1),(50000,3); -INSERT INTO A(t,a) VALUES ('2019-05-03 11:25:25.123456789',5); --- INSERT INTO A(t,a) VALUES (1556841600034000001,5); --- INSERT INTO A(t,a) VALUES (now64(),5); +CREATE TABLE A(t DateTime64) ENGINE = MergeTree() ORDER BY t; +INSERT INTO A(t) VALUES (1556879125123456789); +INSERT INTO A(t) VALUES ('2019-05-03 11:25:25.123456789'); --- 1556841600034 +SELECT toString(t, 'UTC'), toDate(t), toStartOfDay(t), toStartOfQuarter(t), toTime(t), toStartOfMinute(t) FROM A ORDER BY t; - --- CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); --- INSERT INTO B(k,t,b) VALUES (2,40000,3); - -SELECT toString(t, 'UTC'), toDate(t), toStartOfDay(t), toStartOfQuarter(t), toTime(t), toStartOfMinute(t), a FROM A ORDER BY t; - --- DROP TABLE B; DROP TABLE A; + -- issue toDate does a reinterpret_cast of the datetime64 which is incorrect +-- for the example above, it returns 2036-08-23 which is 0x5F15 days after epoch +-- the datetime64 is 0x159B2550CB345F15 \ No newline at end of file From 2185531f70e7d7905a80ced72e9858895b7c8b10 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Sat, 7 Sep 2019 17:22:22 +0300 Subject: [PATCH 012/147] DateTime64 tests --- .../00921_datetime64_compatibility.python | 153 ++++++++++++++++++ .../00921_datetime64_compatibility.sh | 8 + 2 files changed, 161 insertions(+) create mode 100755 dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python create mode 100755 dbms/tests/queries/0_stateless/00921_datetime64_compatibility.sh diff --git a/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python b/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python new file mode 100755 index 00000000000..d2d5388530f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python @@ -0,0 +1,153 @@ +#!/usr/bin/env python +# encoding: utf-8 + +import re +import itertools + +# Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime. +functions=""" +# toTimeZone({datetime}, 'UTC') -- does not work +toYear({datetime}) +toQuarter({datetime}) +toMonth({datetime}) +toDayOfYear({datetime}) +toDayOfMonth({datetime}) +toDayOfWeek({datetime}) +toHour({datetime}) +toMinute({datetime}) +toSecond({datetime}) +toUnixTimestamp({datetime}) +toStartOfYear({datetime}) +toStartOfISOYear({datetime}) +toStartOfQuarter({datetime}) +toStartOfMonth({datetime}) +toMonday({datetime}) +# toStartOfWeek({datetime}) -- there is no such function +toStartOfDay({datetime}) +toStartOfHour({datetime}) +toStartOfMinute({datetime}) +toStartOfFiveMinute({datetime}) +toStartOfTenMinutes({datetime}) +toStartOfFifteenMinutes({datetime}) +# Do not workk with DateTime64 +# toStartOfInterval({datetime}, INTERVAL 1 year) +# toStartOfInterval({datetime}, INTERVAL 1 month) +# toStartOfInterval({datetime}, INTERVAL 1 day) +# toStartOfInterval({datetime}, INTERVAL 15 minute) +toTime({datetime}) +toRelativeYearNum({datetime}) +toRelativeQuarterNum({datetime}) +toRelativeMonthNum({datetime}) +toRelativeWeekNum({datetime}) +toRelativeDayNum({datetime}) +toRelativeHourNum({datetime}) +toRelativeMinuteNum({datetime}) +toRelativeSecondNum({datetime}) +toISOYear({datetime}) +toISOWeek({datetime}) +# toWeek({datetime}) -- Unknown function toWeek +# toYearWeek({datetime}) -- Unknown function toYearWeek +timeSlot({datetime}) +toYYYYMM({datetime}) +toYYYYMMDD({datetime}) +toYYYYMMDDhhmmss({datetime}) +# -- Illegal type DateTime64 of argument of function addYears +# addYears({datetime}, 1) +# addMonths({datetime}, 1) +# addWeeks({datetime}, 1) +# addDays({datetime}, 1) +# addHours({datetime}, 1) +# addMinutes({datetime}, 1) +# addSeconds({datetime}, 1) +# addQuarters({datetime}, 1) +# -- Illegal type DateTime64 of argument of function subtractYears. +# subtractYears({datetime}, 1) +# subtractMonths({datetime}, 1) +# subtractWeeks({datetime}, 1) +# subtractDays({datetime}, 1) +# subtractHours({datetime}, 1) +# subtractMinutes({datetime}, 1) +# subtractSeconds({datetime}, 1) +# subtractQuarters({datetime}, 1) +CAST({datetime} as DateTime) +CAST({datetime} as UInt64) +formatDateTime({datetime}, '%C %d %D %e %F %H %I %j %m %M %n %p %R %S %t %T %u %V %w %y %Y %%') +""".splitlines() + +# filter out empty lines and commented out lines +COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#") +functions = list(filter(lambda f: len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None, functions)) + +# Expanded to cartesian product of all arguments. +# NOTE: {{datetime}} to be turned into {datetime} after str.format() for keys (format string), but not for list of values! +extra_ops =\ +[ + # With same type: + ( + '{{datetime}} {op} {{datetime}}', + { + 'op': + [ + '- ', # does not work, but should it? + '+ ', # does not work, but should it? + '!=', '==', # how do we expect this to work? + '< ', + '<=', + '> ', + '>=' + ] + } + ), + # With other DateTime types: + ( + '{{datetime}} {op} {arg}', + { + 'op': + [ + '-', # does not work, but should it? + '!=', '==', # how do we expect this to work? + # these are naturally expected to work, but they don't: + '< ', + '<=', + '> ', + '>=' + ], + 'arg': ['now()'], + } + ), + # With arithmetic types + ( + '{{datetime}} {op} {arg}', + { + 'op': + [ + '+ ', + '- ', + '==', + '!=', + '< ', + '<=', + '> ', + '>=' + ], + 'arg': + [ + '1', + '-1', + 'toInt64(1)', + 'toInt64(-1)' + ], + }, + ), +] + +# Expand extra_ops here +for f, args in extra_ops: + args_keys = args.keys() + for args_vals in itertools.product(*args.values()): + func = f.format(**dict(zip(args_keys, args_vals))) + functions.append(func) + +for func in functions: + f = func.format(datetime="now64()") + print("""SELECT {function};""".format(function=f)) \ No newline at end of file diff --git a/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.sh b/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.sh new file mode 100755 index 00000000000..1ac0fe8035a --- /dev/null +++ b/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +# We should have correct env vars from shell_config.sh to run this test + +python $CURDIR/00921_datetime64_compatibility.python | ${CLICKHOUSE_CLIENT} -nm \ No newline at end of file From 9cc2baf05c62ed9a2aa556fbb0950f4f616ad265 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 26 Sep 2019 18:12:40 +0300 Subject: [PATCH 013/147] DataType64 as decimal compiles --- dbms/src/Core/DateTime64.cpp | 24 ++-- dbms/src/Core/DateTime64.h | 38 ++--- dbms/src/Core/DecimalFunctions.h | 71 ++++++++++ dbms/src/Core/Types.h | 5 + dbms/src/DataTypes/DataTypeDateTime.cpp | 133 ++++++++++++++++-- dbms/src/DataTypes/DataTypeDateTime.h | 44 ++++-- dbms/src/DataTypes/DataTypeNumberBase.h | 5 + dbms/src/DataTypes/DataTypesDecimal.cpp | 12 +- dbms/src/Functions/DateTimeTransforms.h | 12 +- .../FunctionDateOrDateTimeAddInterval.h | 6 + .../FunctionDateOrDateTimeToSomething.h | 10 +- dbms/src/Functions/FunctionsConversion.h | 40 ++++-- dbms/src/Functions/FunctionsReinterpret.h | 18 ++- dbms/src/Functions/now.cpp | 101 ++++++++----- dbms/src/IO/ReadHelpers.h | 34 ++++- dbms/src/IO/WriteHelpers.h | 15 +- dbms/src/Interpreters/convertFieldToType.cpp | 4 +- .../00921_datetime64_compatibility.python | 5 +- libs/libcommon/include/common/DateLUTImpl.h | 2 +- 19 files changed, 438 insertions(+), 141 deletions(-) create mode 100644 dbms/src/Core/DecimalFunctions.h diff --git a/dbms/src/Core/DateTime64.cpp b/dbms/src/Core/DateTime64.cpp index 53315a60a6d..5676f005cea 100644 --- a/dbms/src/Core/DateTime64.cpp +++ b/dbms/src/Core/DateTime64.cpp @@ -4,19 +4,19 @@ namespace DB { -static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; +//static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; -DateTime64::Components DateTime64::split() const -{ - auto datetime = static_cast(t / NANOS_PER_SECOND); - auto nanos = static_cast(t % NANOS_PER_SECOND); - return Components { datetime, nanos }; -} +//DateTime64::Components DateTime64::split() const +//{ +// auto datetime = static_cast(t / NANOS_PER_SECOND); +// auto nanos = static_cast(t % NANOS_PER_SECOND); +// return Components { datetime, nanos }; +//} -DateTime64::DateTime64(DateTime64::Components c) - : t {c.datetime * NANOS_PER_SECOND + c.nanos} -{ - assert(c.nanos < NANOS_PER_SECOND); -} +//DateTime64::DateTime64(DateTime64::Components c) +// : t {c.datetime * NANOS_PER_SECOND + c.nanos} +//{ +// assert(c.nanos < NANOS_PER_SECOND); +//} } diff --git a/dbms/src/Core/DateTime64.h b/dbms/src/Core/DateTime64.h index 19544d0a698..e87ad217b77 100644 --- a/dbms/src/Core/DateTime64.h +++ b/dbms/src/Core/DateTime64.h @@ -4,25 +4,25 @@ namespace DB { -// this is a separate struct to avoid accidental conversions that -// might occur between time_t and the type storing the datetime64 -// time_t might have a different definition on different libcs -struct DateTime64 { - using Type = Int64; - struct Components { - time_t datetime = 0; - UInt32 nanos = 0; - }; +//// this is a separate struct to avoid accidental conversions that +//// might occur between time_t and the type storing the datetime64 +//// time_t might have a different definition on different libcs +//struct DateTime64 { +// using Type = Int64; +// struct Components { +// time_t datetime = 0; +// UInt32 nanos = 0; +// }; - Components split() const; - explicit DateTime64(Components c); - explicit DateTime64(Type tt) : t{tt} {} - explicit operator bool() const { - return t != 0; - } - Type get() const { return t; } -private: - Type t; -}; +// Components split() const; +// explicit DateTime64(Components c); +// explicit DateTime64(Type tt) : t{tt} {} +// explicit operator bool() const { +// return t != 0; +// } +// Type get() const { return t; } +//private: +// Type t; +//}; } diff --git a/dbms/src/Core/DecimalFunctions.h b/dbms/src/Core/DecimalFunctions.h new file mode 100644 index 00000000000..4f1ffc0376d --- /dev/null +++ b/dbms/src/Core/DecimalFunctions.h @@ -0,0 +1,71 @@ +#pragma once +// Moved Decimal-related functions out from Core/Types.h to reduce compilation time. + +#include +#include + +#include + +namespace DB +{ + +template T decimalScaleMultiplier(UInt32 scale); +template <> inline Int32 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i32(scale); } +template <> inline Int64 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i64(scale); } +template <> inline Int128 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i128(scale); } + +template +struct DecimalComponents +{ + T whole; + T fractional; +}; + +template +Decimal decimalFromComponents(const T & whole, const T & fractional, UInt32 scale) +{ + const auto mul = decimalScaleMultiplier(scale); + const T value = whole * mul + fractional / decimalScaleMultiplier(std::numeric_limits::digits10 - scale); + return Decimal(value); +} + +template +Decimal decimalFromComponents(const DecimalComponents & components, UInt32 scale) +{ + return decimalFromComponents(components.whole, components.fractional, scale); +} + +template +DecimalComponents decimalSplit(const Decimal & decimal, UInt32 scale) +{ + if (scale == 0) + { + return {decimal.value, 0}; + } + const auto scaleMultiplier = decimalScaleMultiplier(scale); + return {decimal.value / scaleMultiplier, decimal.value % scaleMultiplier}; +} + +template +T decimalWholePart(const Decimal & decimal, size_t scale) +{ + if (scale == 0) + return decimal.value; + + return decimal.value / decimalScaleMultiplier(scale); +} + +template +T decimalFractionalPart(const Decimal & decimal, size_t scale) +{ + if (scale == 0) + return 0; + + T result = decimal.value; + if (result < T(0)) + result *= T(-1); + + return result % decimalScaleMultiplier(scale); +} + +} diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index a18fed4cf78..536320f9b05 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -29,6 +29,7 @@ enum class TypeIndex Float64, Date, DateTime, + DateTime32 = DateTime, DateTime64, String, FixedString, @@ -152,6 +153,10 @@ using Decimal32 = Decimal; using Decimal64 = Decimal; using Decimal128 = Decimal; +// TODO (nemkov): consider making a strong typedef +//using DateTime32 = time_t; +using DateTime64 = Decimal64; + template <> struct TypeName { static const char * get() { return "Decimal32"; } }; template <> struct TypeName { static const char * get() { return "Decimal64"; } }; template <> struct TypeName { static const char * get() { return "Decimal128"; } }; diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 6d56774b8a7..30568d0a12d 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -5,7 +5,8 @@ #include #include #include -#include +#include +#include #include #include #include @@ -28,8 +29,9 @@ struct TypeGetter; template<> struct TypeGetter { using Type = time_t; - using Column = ColumnUInt32; + using Column = ColumnVector; using Convertor = time_t; + using FieldType = NearestFieldType; // This is not actually true, which is bad form as it truncates the value from time_t (long int) into uint32_t // static_assert(sizeof(Column::value_type) == sizeof(Type)); @@ -39,17 +41,31 @@ struct TypeGetter { }; template<> -struct TypeGetter { - using Type = DateTime64::Type; - using Column = ColumnUInt64; +struct TypeGetter { + using Type = DateTime64; + using Column = ColumnDecimal; using Convertor = DateTime64; + using FieldType = NearestFieldType; - static_assert(sizeof(Column::value_type) == sizeof(Type)); + static_assert(sizeof(typename Column::Container::value_type) == sizeof(Type)); static constexpr TypeIndex Index = TypeIndex::DateTime64; static constexpr const char * Name = "DateTime64"; }; +template +bool protobufReadDateTime(ProtobufReader & protobuf, T & date_time) +{ + return protobuf.readDateTime(date_time); +} + +template <> +bool protobufReadDateTime(ProtobufReader & protobuf, DateTime64 & date_time) +{ + // TODO (vnemkov): protobuf.readDecimal ? + return protobuf.readDateTime(date_time.value); +} + template DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_zone_name) : has_explicit_time_zone(!time_zone_name.empty()), @@ -229,7 +245,7 @@ void DataTypeDateTimeBase::deserializeProtobuf(IColumn & column, Pro { row_added = false; typename TypeGetter::Type t; - if (!protobuf.readDateTime(t)) + if (protobufReadDateTime(protobuf, t)) return; auto & container = assert_cast::Column &>(column).getData(); @@ -242,6 +258,40 @@ void DataTypeDateTimeBase::deserializeProtobuf(IColumn & column, Pro container.back() = t; } +// TODO (vnemkov): Binary serialization/deserialization is same as for DataTypeNumberBase. + +template +void DataTypeDateTimeBase::serializeBinary(const Field& /*field*/, WriteBuffer& /*ostr*/) const +{ +// // Same as +// typename TypeGetter::Column::value_type x = get>(field); +// writeBinary(x, ostr); +} + +template +void DataTypeDateTimeBase::deserializeBinary(Field&, ReadBuffer&) const +{ + +} + +template +void DataTypeDateTimeBase::serializeBinary(const IColumn&, size_t, WriteBuffer&) const +{ + +} + +template +void DataTypeDateTimeBase::deserializeBinary(IColumn&, ReadBuffer&) const +{ + +} + +template +Field DataTypeDateTimeBase::getDefault() const +{ + return typename TypeGetter::FieldType{}; +} + template bool DataTypeDateTimeBase::equals(const IDataType & rhs) const { @@ -272,19 +322,51 @@ static DataTypePtr create(const ASTPtr & arguments) return std::make_shared(arg->value.get()); } +struct ArgumentSpec +{ + enum ArgumentKind + { + Optional, + Mandatory + }; + + size_t index; + const char * name; + ArgumentKind kind; +}; + +template +T getArgument(const ASTPtr & arguments, ArgumentSpec argument_spec, const std::string context_data_type_name) +{ + using NearestResultType = NearestFieldType; + const auto fieldType = Field::TypeToEnum::value; + + if (!arguments || arguments->children.size() <= argument_spec.index) + { + if (argument_spec.kind == ArgumentSpec::Optional) + return {}; + else + throw Exception("Parameter #" + std::to_string(argument_spec.index) + "'" + argument_spec.name + "' for " + context_data_type_name + " is missing.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + + const auto * argument = arguments->children[argument_spec.index]->as(); + if (!argument || argument->value.getType() != fieldType) + throw Exception("'" + std::string(argument_spec.name) + "' parameter for " + + context_data_type_name + " must be " + Field::Types::toString(fieldType) + + " literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return argument->value.get(); +} + static DataTypePtr create64(const ASTPtr & arguments) { if (!arguments) - return std::make_shared(); + return std::make_shared(DataTypeDateTime64::default_scale); - if (arguments->children.size() != 1) - throw Exception("DateTime64 data type can optionally have only one argument - time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + const auto scale = getArgument(arguments, ArgumentSpec{0, "scale", ArgumentSpec::Mandatory}, "DateType64"); + const auto timezone = getArgument(arguments, ArgumentSpec{0, "timezone", ArgumentSpec::Optional}, "DateType64"); - const auto * timezone_arg = arguments->children[0]->as(); - if (!timezone_arg || timezone_arg->value.getType() != Field::Types::String) - throw Exception("Timezone parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return std::make_shared(timezone_arg->value.get()); + return std::make_shared(scale, timezone); } void registerDataTypeDateTime(DataTypeFactory & factory) @@ -295,5 +377,26 @@ void registerDataTypeDateTime(DataTypeFactory & factory) factory.registerAlias("TIMESTAMP", "DateTime", DataTypeFactory::CaseInsensitive); } +/// Explicit template instantiations. +template class DataTypeDateTimeBase; +template class DataTypeDateTimeBase; + +MutableColumnPtr DataTypeDateTime::createColumn() const +{ + return ColumnVector::create(); +} + +DataTypeDateTime64::DataTypeDateTime64(UInt8 scale_, const std::string & time_zone_name) + : Base(time_zone_name), + scale(scale_) +{ + // TODO(vnemkov): validate scale +} + +MutableColumnPtr DataTypeDateTime64::createColumn() const +{ + return ColumnDecimal::create(0, scale); +} + } diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index b144ab6f708..6e0050d45aa 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -6,7 +6,6 @@ class DateLUTImpl; - template class> struct is_instance : public std::false_type {}; @@ -16,6 +15,12 @@ struct is_instance, U> : public std::true_type {}; namespace DB { +template +class ColumnDecimal; + +template +class ColumnVector; + /** DateTime stores time as unix timestamp. * The value itself is independent of time zone. * @@ -23,7 +28,7 @@ namespace DB * In text format it is serialized to and parsed from YYYY-MM-DD hh:mm:ss format. * The text format is dependent of time zone. * - * To convert from/to text format, time zone may be specified explicitly or implicit time zone may be used. + * To constt from/to text format, time zone may be specified explicitly or implicit time zone may be used. * * Time zone may be specified explicitly as type parameter, example: DateTime('Europe/Moscow'). * As it does not affect the internal representation of values, @@ -36,10 +41,12 @@ namespace DB * Server time zone is the time zone specified in 'timezone' parameter in configuration file, * or system time zone at the moment of server startup. */ -template -class DataTypeDateTimeBase : public DataTypeNumberBase +template +class DataTypeDateTimeBase : public IDataType { public: + using FieldType = DateTimeType; + DataTypeDateTimeBase(const std::string & time_zone_name = ""); const char * getFamilyName() const override; @@ -59,11 +66,19 @@ public: void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override; void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; - bool canBeUsedAsVersion() const override { return true; } + void serializeBinary(const Field&, WriteBuffer&) const override; + void deserializeBinary(Field&, ReadBuffer&) const override; + void serializeBinary(const IColumn&, size_t, WriteBuffer&) const override; + void deserializeBinary(IColumn&, ReadBuffer&) const override; + Field getDefault() const override; + bool canBePromoted() const override { return false; } + bool isParametric() const override { return true; } + bool haveSubtypes() const override { return false; } bool canBeInsideNullable() const override { return true; } bool equals(const IDataType & rhs) const override; + const DateLUTImpl & getTimeZone() const { return time_zone; } protected: @@ -73,11 +88,24 @@ protected: }; struct DataTypeDateTime : DataTypeDateTimeBase { - using DataTypeDateTimeBase::DataTypeDateTimeBase; + using Base = DataTypeDateTimeBase; + using Base::Base; + + using ColumnType = ColumnVector; + + MutableColumnPtr createColumn() const override; }; -struct DataTypeDateTime64 : DataTypeDateTimeBase { - using DataTypeDateTimeBase::DataTypeDateTimeBase; +struct DataTypeDateTime64 : DataTypeDateTimeBase { + using Base = DataTypeDateTimeBase; + DataTypeDateTime64(UInt8 scale_, const std::string & time_zone_name = ""); + + using ColumnType = ColumnDecimal; + MutableColumnPtr createColumn() const override; + + static constexpr UInt8 default_scale = 3; +private: + const UInt8 scale; }; } diff --git a/dbms/src/DataTypes/DataTypeNumberBase.h b/dbms/src/DataTypes/DataTypeNumberBase.h index f9e5de44a9e..fb752ad5329 100644 --- a/dbms/src/DataTypes/DataTypeNumberBase.h +++ b/dbms/src/DataTypes/DataTypeNumberBase.h @@ -7,6 +7,9 @@ namespace DB { +template +class ColumnVector; + /** Implements part of the IDataType interface, common to all numbers and for Date and DateTime. */ template @@ -18,6 +21,8 @@ public: static constexpr bool is_parametric = false; using FieldType = T; + using ColumnType = ColumnVector; + const char * getFamilyName() const override { return TypeName::get(); } TypeIndex getTypeId() const override { return TypeId::value; } diff --git a/dbms/src/DataTypes/DataTypesDecimal.cpp b/dbms/src/DataTypes/DataTypesDecimal.cpp index be5042fa57e..51bea0caf8f 100644 --- a/dbms/src/DataTypes/DataTypesDecimal.cpp +++ b/dbms/src/DataTypes/DataTypesDecimal.cpp @@ -1,17 +1,19 @@ -#include -#include -#include #include + +#include +#include +#include #include #include #include #include #include #include -#include -#include #include +#include +#include +#include namespace DB { diff --git a/dbms/src/Functions/DateTimeTransforms.h b/dbms/src/Functions/DateTimeTransforms.h index 857b76241b7..fb3eccbcb1d 100644 --- a/dbms/src/Functions/DateTimeTransforms.h +++ b/dbms/src/Functions/DateTimeTransforms.h @@ -1,8 +1,10 @@ #pragma once #include +#include #include #include #include +#include #include #include #include @@ -45,10 +47,10 @@ struct ToDateImpl { static constexpr auto name = "toDate"; - static inline UInt16 execute(DateTime64::Type t, const DateLUTImpl & time_zone) + static inline UInt16 execute(DateTime64::NativeType t, const DateLUTImpl & time_zone) { std::cout << "converting DateTime64 t=" << t << " " << name << std::endl; - return UInt16(time_zone.toDayNum(DateTime64(t).split().datetime)); + return UInt16(time_zone.toDayNum(decimalWholePart(DateTime64(t), 9))); } static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone) @@ -653,14 +655,14 @@ struct DateTimeTransformImpl { static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) { - using Op = Transformer; + using Op = Transformer; const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block, arguments, 1, 0); const ColumnPtr source_col = block.getByPosition(arguments[0]).column; - if (const auto * sources = checkAndGetColumn>(source_col.get())) + if (const auto * sources = checkAndGetColumn(source_col.get())) { - auto col_to = ColumnVector::create(); + auto col_to = ColumnVector::create(); Op::vector(sources->getData(), col_to->getData(), time_zone); block.getByPosition(result).column = std::move(col_to); } diff --git a/dbms/src/Functions/FunctionDateOrDateTimeAddInterval.h b/dbms/src/Functions/FunctionDateOrDateTimeAddInterval.h index 9f9011e840e..44c239bd303 100644 --- a/dbms/src/Functions/FunctionDateOrDateTimeAddInterval.h +++ b/dbms/src/Functions/FunctionDateOrDateTimeAddInterval.h @@ -72,6 +72,12 @@ struct AddDaysImpl { static constexpr auto name = "addDays"; + static inline UInt32 execute(UInt64 t, Int64 delta, const DateLUTImpl & time_zone) + { + // TODO (nemkov): LUT does not support out-of range date values for now. + return time_zone.addDays(t, delta); + } + static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone) { return time_zone.addDays(t, delta); diff --git a/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h b/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h index ad9aed612ee..ad51f727e68 100644 --- a/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/dbms/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -20,8 +20,8 @@ struct WithDateTime64Converter : public Transform { static inline auto execute(DataTypeDateTime64::FieldType t, const DateLUTImpl & time_zone) { auto x = DateTime64(t); - auto res = Transform::execute(static_cast(x.split().datetime), time_zone); - std::cout << "calling through datetime64 wrapper v=" << x.get() << "tz= " << time_zone.getTimeZone() << " result=" << res << std::endl; + auto res = Transform::execute(static_cast(decimalWholePart(x, DataTypeDateTime64::default_scale)), time_zone); + std::cout << "calling through datetime64 wrapper v=" << x.value << "tz= " << time_zone.getTimeZone() << " result=" << res << std::endl; return res; } }; @@ -93,11 +93,11 @@ public: WhichDataType which(from_type); if (which.isDate()) - DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); + DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); else if (which.isDateTime()) - DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); + DateTimeTransformImpl::execute(block, arguments, result, input_rows_count); else if (which.isDateTime64()) - DateTimeTransformImpl>::execute(block, arguments, result, input_rows_count); + DateTimeTransformImpl>::execute(block, arguments, result, input_rows_count); else throw Exception("Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index e3e8fa80a0e..37cdbf97d38 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -169,7 +169,7 @@ struct ToDateTimeImpl }; template struct ConvertImpl - : DateTimeTransformImpl {}; + : DateTimeTransformImpl {}; /// Implementation of toDate function. @@ -188,10 +188,10 @@ struct ToDateTransform32Or64 /** Conversion of DateTime to Date: throw off time component. */ template struct ConvertImpl - : DateTimeTransformImpl {}; + : DateTimeTransformImpl {}; template struct ConvertImpl - : DateTimeTransformImpl {}; + : DateTimeTransformImpl {}; /** Special case of converting (U)Int32 or (U)Int64 (and also, for convenience, Float32, Float64) to Date. * If number is less than 65536, then it is treated as DayNum, and if greater or equals, then as unix timestamp. @@ -201,17 +201,17 @@ template struct ConvertImpl struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; template struct ConvertImpl - : DateTimeTransformImpl> {}; + : DateTimeTransformImpl> {}; /** Transformation of numbers, dates, datetimes to strings: through formatting. @@ -251,7 +251,7 @@ struct FormatImpl { static void execute(const DataTypeDateTime64::FieldType x, WriteBuffer & wb, const DataTypeDateTime64 *, const DateLUTImpl * time_zone) { - std::cout << "!!!!!!!!!!!!!!!!!!!!!!! performing FormatImpl v=" << x << " tz=" << (void*)time_zone << std::endl; + std::cout << "!!!!!!!!!!!!!!!!!!!!!!! performing FormatImpl v=" << x << " tz=" << time_zone << std::endl; writeDateTimeText(DateTime64(x), wb, *time_zone); } }; @@ -827,15 +827,25 @@ public: } else { + UInt8 max_args = 2; +// UInt8 scale = 3; +// if constexpr (std::is_same_v) +// { +// max_args += 1; +// if (arguments.size() == max_args - 1) +// { + +// } +// } /** Optional second argument with time zone is supported: * - for functions toDateTime, toUnixTimestamp, toDate; * - for function toString of DateTime argument. */ - if (arguments.size() == 2) + if (arguments.size() == max_args) { - if (!checkAndGetDataType(arguments[1].type.get())) - throw Exception("Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName(), + if (!checkAndGetDataType(arguments[max_args - 1].type.get())) + throw Exception("Illegal type " + arguments[max_args - 1].type->getName() + " of 2nd argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); static constexpr bool to_date_or_time = std::is_same_v @@ -853,8 +863,8 @@ public: if (std::is_same_v) return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); - else if (std::is_same_v) - return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); +// else if (std::is_same_v) +// return std::make_shared(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0)); else return std::make_shared(); } diff --git a/dbms/src/Functions/FunctionsReinterpret.h b/dbms/src/Functions/FunctionsReinterpret.h index 1e008ba408b..e54c324f843 100644 --- a/dbms/src/Functions/FunctionsReinterpret.h +++ b/dbms/src/Functions/FunctionsReinterpret.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -147,6 +148,18 @@ public: } }; +template +struct ColumnType +{ + using Type = ColumnVector; +}; + +template +struct ColumnType> +{ + using Type = ColumnDecimal>; +}; + template class FunctionReinterpretStringAs : public IFunction @@ -156,6 +169,7 @@ public: static FunctionPtr create(const Context &) { return std::make_shared(); } using ToFieldType = typename ToDataType::FieldType; + using ColumnType = typename ColumnType::Type; String getName() const override { @@ -179,12 +193,12 @@ public: { if (const ColumnString * col_from = typeid_cast(block.getByPosition(arguments[0]).column.get())) { - auto col_res = ColumnVector::create(); + auto col_res = ColumnType::create(); const ColumnString::Chars & data_from = col_from->getChars(); const ColumnString::Offsets & offsets_from = col_from->getOffsets(); size_t size = offsets_from.size(); - typename ColumnVector::Container & vec_res = col_res->getData(); + typename ColumnType::Container & vec_res = col_res->getData(); vec_res.resize(size); size_t offset = 0; diff --git a/dbms/src/Functions/now.cpp b/dbms/src/Functions/now.cpp index 2706d8b849a..5e13074c0f6 100644 --- a/dbms/src/Functions/now.cpp +++ b/dbms/src/Functions/now.cpp @@ -1,49 +1,34 @@ #include +#include #include #include +#include + namespace DB { -template -struct TypeGetter; +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} -template<> -struct TypeGetter { - using Type = DataTypeDateTime64; - static constexpr auto name = "now64"; - - static DateTime64::Type now() { - long int ns; - time_t sec; - timespec spec; - clock_gettime(CLOCK_REALTIME, &spec); - sec = spec.tv_sec; - ns = spec.tv_nsec; - return 1000 * 1000 * 1000 * sec + ns; - } -}; - -template<> -struct TypeGetter { - using Type = DataTypeDateTime; - static constexpr auto name = "now"; - - static UInt64 now() { - return static_cast(time(nullptr)); - } -}; +DateTime64::NativeType nowSubsecond(UInt8 scale) { + timespec spec; + clock_gettime(CLOCK_REALTIME, &spec); + return decimalFromComponents(spec.tv_sec, spec.tv_nsec, scale); +} /// Get the current time. (It is a constant, it is evaluated once for the entire query.) -template class FunctionNow : public IFunction { public: - static constexpr auto name = TypeGetter::name; - static FunctionPtr create(const Context &) { return std::make_shared>(); } + static constexpr auto name = "now"; + static FunctionPtr create(const Context &) { return std::make_shared(); } String getName() const override { @@ -54,21 +39,69 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override { - return std::make_shared::Type>(); + return std::make_shared(); } bool isDeterministic() const override { return false; } void executeImpl(Block & block, const ColumnNumbers &, size_t result, size_t input_rows_count) override { - block.getByPosition(result).column = typename TypeGetter::Type().createColumnConst(input_rows_count, TypeGetter::now()); + block.getByPosition(result).column = DataTypeDateTime().createColumnConst(input_rows_count, static_cast(time(nullptr))); + } +}; + +class FunctionNow64 : public IFunction +{ +public: + static constexpr auto name = "now64"; + static FunctionPtr create(const Context &) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 0; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + // Type check is similar to the validateArgumentType, trying to keep error codes and messages as close to the said function as possible. + if (arguments.size() <= 1) + throw Exception("Incorrect number of arguments of function " + getName(), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto & argument = arguments[0]; + if (!isInteger(argument.type) || !isColumnConst(*argument.column)) + throw Exception("Illegal type " + argument.type->getName() + + " of 0" + + " argument of function " + getName() + + ". Expected const integer.", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const UInt64 scale = argument.column->get64(0); + + return std::make_shared(scale); + } + + bool isDeterministic() const override { return false; } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override + { + const IColumn * scale_column = block.getByPosition(arguments[0]).column.get(); + if (!isColumnConst(*scale_column)) + throw Exception("Unsupported argument type: " + scale_column->getName() + + + " for function " + getName() + ". Expected const integer.", + ErrorCodes::ILLEGAL_COLUMN); + + const UInt64 scale = scale_column->get64(0); + block.getByPosition(result).column = DataTypeDateTime64(scale).createColumnConst(input_rows_count, nowSubsecond(scale)); } }; void registerFunctionNow(FunctionFactory & factory) { - factory.registerFunction>(FunctionFactory::CaseInsensitive); - factory.registerFunction>(FunctionFactory::CaseInsensitive); + factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerFunction(FunctionFactory::CaseInsensitive); } } diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index 7698fceb04e..30d7f7c24f9 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -14,19 +14,20 @@ #include #include +#include #include #include #include #include #include +#include #include #include #include #include - #include #ifdef __clang__ @@ -320,6 +321,12 @@ bool tryReadIntText(T & x, ReadBuffer & buf) return readIntTextImpl(x, buf); } +template +void readIntText(Decimal & x, ReadBuffer & buf) +{ + readIntText(x.value, buf); +} + /** More efficient variant (about 1.5 times on real dataset). * Differs in following: * - for numbers starting with zero, parsed only zero; @@ -630,13 +637,26 @@ inline void readDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTI inline void readDateTimeText(DateTime64 & datetime64, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) { - DateTime64::Components c; - readDateTimeTextImpl(c.datetime, buf, date_lut); - buf.ignore(); // ignore the "." + DB::DecimalComponents c; + readDateTimeTextImpl(c.whole, buf, date_lut); + + char separator; + if (buf.read(separator)) + { + if (separator != '.') + { + throw Exception("Cannot parse DateTime64 from text.", ErrorCodes::CANNOT_PARSE_DATETIME); + } + } + auto remaining = buf.available(); - readIntText(c.nanos, buf); - c.nanos *= static_cast(std::pow(10, 9 - remaining)); - datetime64 = DateTime64(c); + readIntText(c.fractional, buf); + + // TODO: hardcoded precision, use something similar to DataTypeDecimal::readText() + const int scale = common::exp10_i32(9); + c.fractional *= static_cast(std::pow(10, 9 - remaining)); + + datetime64 = decimalFromComponents(c, scale); } inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) diff --git a/dbms/src/IO/WriteHelpers.h b/dbms/src/IO/WriteHelpers.h index 260e9cc1569..bff8624ba8d 100644 --- a/dbms/src/IO/WriteHelpers.h +++ b/dbms/src/IO/WriteHelpers.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -760,16 +761,16 @@ inline void writeDateTimeText(DateTime64 datetime64, WriteBuffer & buf, const Da return; } - auto c = datetime64.split(); - const auto & values = date_lut.getValues(c.datetime); + auto c = decimalSplit(datetime64, 9); + const auto & values = date_lut.getValues(c.whole); writeDateTimeText( LocalDateTime(values.year, values.month, values.day_of_month, - date_lut.toHour(c.datetime), date_lut.toMinute(c.datetime), date_lut.toSecond(c.datetime)), buf); + date_lut.toHour(c.whole), date_lut.toMinute(c.whole), date_lut.toSecond(c.whole)), buf); buf.write(fractional_time_delimiter); char data[10]; - int written = sprintf(data, "%09d", c.nanos); + int written = sprintf(data, "%09ld", c.fractional); // TODO(nemkov): can it be negative ? if yes, do abs() on it. writeText(&data[0], static_cast(written), buf); } @@ -779,12 +780,6 @@ inline void writeText(const LocalDateTime & x, WriteBuffer & buf) { writeDateTim inline void writeText(const UUID & x, WriteBuffer & buf) { writeUUIDText(x, buf); } inline void writeText(const UInt128 & x, WriteBuffer & buf) { writeText(UUID(x), buf); } -template inline T decimalScaleMultiplier(UInt32 scale); -template <> inline Int32 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i32(scale); } -template <> inline Int64 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i64(scale); } -template <> inline Int128 decimalScaleMultiplier(UInt32 scale) { return common::exp10_i128(scale); } - - template void writeText(Decimal value, UInt32 scale, WriteBuffer & ostr) { diff --git a/dbms/src/Interpreters/convertFieldToType.cpp b/dbms/src/Interpreters/convertFieldToType.cpp index 396bc8ed718..55311f30ebb 100644 --- a/dbms/src/Interpreters/convertFieldToType.cpp +++ b/dbms/src/Interpreters/convertFieldToType.cpp @@ -146,7 +146,7 @@ UInt64 stringToDateTime(const String & s) return UInt64(date_time); } -DateTime64::Type stringToDateTime64(const String & s) +DateTime64::NativeType stringToDateTime64(const String & s) { ReadBufferFromString in(s); DateTime64 datetime64 {0}; @@ -155,7 +155,7 @@ DateTime64::Type stringToDateTime64(const String & s) if (!in.eof()) throw Exception("String is too long for DateTime64: " + s, ErrorCodes::TOO_LARGE_STRING_SIZE); - return datetime64.get(); + return datetime64.value; } Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const IDataType * from_type_hint) diff --git a/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python b/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python index d2d5388530f..f3663a8b3c2 100755 --- a/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python +++ b/dbms/tests/queries/0_stateless/00921_datetime64_compatibility.python @@ -148,6 +148,9 @@ for f, args in extra_ops: func = f.format(**dict(zip(args_keys, args_vals))) functions.append(func) +# TODO: use string.Template here to allow lines that do not contain type, like: SELECT CAST(toDateTime64(1234567890), 'DateTime64') for func in functions: f = func.format(datetime="now64()") - print("""SELECT {function};""".format(function=f)) \ No newline at end of file + print("""SELECT {function};""".format(function=f)) + +print("SELECT CAST( )") \ No newline at end of file diff --git a/libs/libcommon/include/common/DateLUTImpl.h b/libs/libcommon/include/common/DateLUTImpl.h index ef50d6ede3f..7f1e8c74313 100644 --- a/libs/libcommon/include/common/DateLUTImpl.h +++ b/libs/libcommon/include/common/DateLUTImpl.h @@ -666,7 +666,7 @@ public: inline DayNum makeDayNum(UInt16 year, UInt8 month, UInt8 day_of_month) const { if (unlikely(year < DATE_LUT_MIN_YEAR || year > DATE_LUT_MAX_YEAR || month < 1 || month > 12 || day_of_month < 1 || day_of_month > 31)) - return DayNum(0); + return DayNum(0); // TODO (nemkov, DateTime64 phase 2): implement creating real date for year outside of LUT range. return DayNum(years_months_lut[(year - DATE_LUT_MIN_YEAR) * 12 + month - 1] + day_of_month - 1); } From 6e813002d5e65357250afc60df49e5a9ae233bde Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 2 Oct 2019 08:53:38 +0300 Subject: [PATCH 014/147] Proper serialization of DateTime64 also updated functions support. --- dbms/src/DataTypes/DataTypeDateTime.cpp | 398 +++++++++--------- dbms/src/DataTypes/DataTypeDateTime.h | 101 +++-- dbms/src/DataTypes/DataTypeDecimalBase.cpp | 214 ++++++++++ dbms/src/DataTypes/DataTypeDecimalBase.h | 363 ++++++++++++++++ dbms/src/DataTypes/DataTypesDecimal.cpp | 210 +-------- dbms/src/DataTypes/DataTypesDecimal.h | 208 +-------- dbms/src/Formats/ProtobufReader.cpp | 19 + dbms/src/Formats/ProtobufReader.h | 3 +- dbms/src/Formats/ProtobufWriter.cpp | 10 +- dbms/src/Formats/ProtobufWriter.h | 3 +- dbms/src/Functions/DateTimeTransforms.h | 8 +- .../FunctionDateOrDateTimeToSomething.h | 23 +- dbms/src/Functions/FunctionsConversion.h | 12 +- dbms/src/Functions/FunctionsReinterpret.h | 15 +- dbms/src/IO/ReadHelpers.h | 10 +- dbms/src/IO/WriteHelpers.h | 16 +- dbms/src/Interpreters/convertFieldToType.cpp | 7 +- 17 files changed, 927 insertions(+), 693 deletions(-) create mode 100644 dbms/src/DataTypes/DataTypeDecimalBase.cpp create mode 100644 dbms/src/DataTypes/DataTypeDecimalBase.h diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 30568d0a12d..70e67334fea 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -1,58 +1,56 @@ -#include -#include -#include +#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include #include #include #include -#include -#include - -#include #include - +#include +#include +#include +#include #include #include +namespace +{ +using namespace DB; +static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone) +{ + switch (settings.date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + readDateTimeText(x, istr, time_zone); + return; + case FormatSettings::DateTimeInputFormat::BestEffort: + parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone); + return; + } +} + +static inline void readText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & /*utc_time_zone*/) +{ + switch (settings.date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + readDateTimeText(x, scale, istr, time_zone); + return; + default: + return; + } +} +} + namespace DB { -template -struct TypeGetter; - -template<> -struct TypeGetter { - using Type = time_t; - using Column = ColumnVector; - using Convertor = time_t; - using FieldType = NearestFieldType; - - // This is not actually true, which is bad form as it truncates the value from time_t (long int) into uint32_t - // static_assert(sizeof(Column::value_type) == sizeof(Type)); - - static constexpr TypeIndex Index = TypeIndex::DateTime; - static constexpr const char * Name = "DateTime"; -}; - -template<> -struct TypeGetter { - using Type = DateTime64; - using Column = ColumnDecimal; - using Convertor = DateTime64; - using FieldType = NearestFieldType; - - static_assert(sizeof(typename Column::Container::value_type) == sizeof(Type)); - - static constexpr TypeIndex Index = TypeIndex::DateTime64; - static constexpr const char * Name = "DateTime64"; -}; - template bool protobufReadDateTime(ProtobufReader & protobuf, T & date_time) { @@ -66,153 +64,103 @@ bool protobufReadDateTime(ProtobufReader & protobuf, DateTime64 & da return protobuf.readDateTime(date_time.value); } -template -DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_zone_name) +TimezoneMixin::TimezoneMixin(const std::string & time_zone_name) : has_explicit_time_zone(!time_zone_name.empty()), time_zone(DateLUT::instance(time_zone_name)), utc_time_zone(DateLUT::instance("UTC")) +{} + +DataTypeDateTime::DataTypeDateTime(const std::string & time_zone_name) + : TimezoneMixin(time_zone_name) { } -template -const char * DataTypeDateTimeBase::getFamilyName() const -{ - return TypeGetter::Name; -} - -template -std::string DataTypeDateTimeBase::doGetName() const +std::string DataTypeDateTime::doGetName() const { if (!has_explicit_time_zone) - return TypeGetter::Name; + return "DateTime"; WriteBufferFromOwnString out; - out << TypeGetter::Name << "(" << quote << time_zone.getTimeZone() << ")"; + out << "DateTime(" << quote << time_zone.getTimeZone() << ")"; return out.str(); } -template -TypeIndex DataTypeDateTimeBase::getTypeId() const +void DataTypeDateTime::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const { - return TypeGetter::Index; + writeDateTimeText(assert_cast(column).getData()[row_num], ostr, time_zone); } -template -void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const -{ - using TG = TypeGetter; - writeDateTimeText(typename TG::Convertor(assert_cast(column).getData()[row_num]), ostr, time_zone); -} - -template -void DataTypeDateTimeBase::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +void DataTypeDateTime::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); } - -static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone) -{ - switch (settings.date_time_input_format) - { - case FormatSettings::DateTimeInputFormat::Basic: - readDateTimeText(x, istr, time_zone); - return; - case FormatSettings::DateTimeInputFormat::BestEffort: - parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone); - return; - } -} - -static inline void readText(DateTime64 & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & /*utc_time_zone*/) -{ - switch (settings.date_time_input_format) - { - case FormatSettings::DateTimeInputFormat::Basic: - readDateTimeText(x, istr, time_zone); - return; - default: - return; - } -} - -template -void DataTypeDateTimeBase::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void DataTypeDateTime::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { deserializeTextEscaped(column, istr, settings); } -template -void DataTypeDateTimeBase::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - typename TypeGetter::Type x = 0; - readText(x, istr, settings, time_zone, utc_time_zone); - - assert_cast::Column &>(column).getData().push_back(x); + time_t x; + ::readText(x, istr, settings, time_zone, utc_time_zone); + assert_cast(column).getData().push_back(x); } -template -void DataTypeDateTimeBase::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +void DataTypeDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('\'', ostr); serializeText(column, row_num, ostr, settings); writeChar('\'', ostr); } -template -void DataTypeDateTimeBase::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - typename TypeGetter::Type x; + time_t x; if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808' { - readText(x, istr, settings, time_zone, utc_time_zone); + ::readText(x, istr, settings, time_zone, utc_time_zone); assertChar('\'', istr); } else /// Just 1504193808 or 01504193808 { readIntText(x, istr); } - - assert_cast::Column &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. + assert_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. } -template -void DataTypeDateTimeBase::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +void DataTypeDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('"', ostr); serializeText(column, row_num, ostr, settings); writeChar('"', ostr); } -template -void DataTypeDateTimeBase::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - typename TypeGetter::Type x; + time_t x; if (checkChar('"', istr)) { - readText(x, istr, settings, time_zone, utc_time_zone); + ::readText(x, istr, settings, time_zone, utc_time_zone); assertChar('"', istr); } else { readIntText(x, istr); } - - assert_cast::Column &>(column).getData().push_back(x); + assert_cast(column).getData().push_back(x); } -template -void DataTypeDateTimeBase::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +void DataTypeDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('"', ostr); serializeText(column, row_num, ostr, settings); writeChar('"', ostr); } -template -void DataTypeDateTimeBase::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - typename TypeGetter::Type x = 0; + time_t x; if (istr.eof()) throwReadAfterEOF(); @@ -222,33 +170,29 @@ void DataTypeDateTimeBase::deserializeTextCSV(IColumn & column, Read if (maybe_quote == '\'' || maybe_quote == '\"') ++istr.position(); - readText(x, istr, settings, time_zone, utc_time_zone); + ::readText(x, istr, settings, time_zone, utc_time_zone); if (maybe_quote == '\'' || maybe_quote == '\"') assertChar(maybe_quote, istr); - assert_cast::Column &>(column).getData().push_back(x); + assert_cast(column).getData().push_back(x); } -template -void DataTypeDateTimeBase::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const +void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const { - if (value_index) - return; - - typename TypeGetter::Type t = assert_cast::Column &>(column).getData()[row_num]; - value_index = assert_cast(protobuf.writeDateTime(t)); +// if (value_index) +// return; + value_index = static_cast(protobuf.writeDateTime(assert_cast(assert_cast(column).getData()[row_num]))); } -template -void DataTypeDateTimeBase::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const +void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const { row_added = false; - typename TypeGetter::Type t; - if (protobufReadDateTime(protobuf, t)) + time_t t; + if (!protobuf.readDateTime(t)) return; - auto & container = assert_cast::Column &>(column).getData(); + auto & container = assert_cast(column).getData(); if (allow_add_row) { container.emplace_back(t); @@ -258,48 +202,140 @@ void DataTypeDateTimeBase::deserializeProtobuf(IColumn & column, Pro container.back() = t; } -// TODO (vnemkov): Binary serialization/deserialization is same as for DataTypeNumberBase. - -template -void DataTypeDateTimeBase::serializeBinary(const Field& /*field*/, WriteBuffer& /*ostr*/) const -{ -// // Same as -// typename TypeGetter::Column::value_type x = get>(field); -// writeBinary(x, ostr); -} - -template -void DataTypeDateTimeBase::deserializeBinary(Field&, ReadBuffer&) const -{ - -} - -template -void DataTypeDateTimeBase::serializeBinary(const IColumn&, size_t, WriteBuffer&) const -{ - -} - -template -void DataTypeDateTimeBase::deserializeBinary(IColumn&, ReadBuffer&) const -{ - -} - -template -Field DataTypeDateTimeBase::getDefault() const -{ - return typename TypeGetter::FieldType{}; -} - -template -bool DataTypeDateTimeBase::equals(const IDataType & rhs) const +bool DataTypeDateTime::equals(const IDataType & rhs) const { /// DateTime with different timezones are equal, because: /// "all types with different time zones are equivalent and may be used interchangingly." return typeid(rhs) == typeid(*this); } +DataTypeDateTime64::DataTypeDateTime64(UInt8 scale_, const std::string & time_zone_name) + : DataTypeDecimalBase(maxDecimalPrecision(), scale_), + TimezoneMixin(time_zone_name) +{ +} + +std::string DataTypeDateTime64::doGetName() const +{ + return std::string(getFamilyName()) + "(" + std::to_string(this->scale) + ")"; +} + +void DataTypeDateTime64::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & /*settings*/) const +{ + writeDateTimeText(assert_cast(column).getData()[row_num], scale, ostr, time_zone); +} + +void DataTypeDateTime64::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + deserializeTextEscaped(column, istr, settings); +} + +void DataTypeDateTime64::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + serializeText(column, row_num, ostr, settings); +} + +void DataTypeDateTime64::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + DateTime64 x; + ::readText(x, scale, istr, settings, time_zone, utc_time_zone); + assert_cast(column).getData().push_back(x); +} + +void DataTypeDateTime64::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + writeChar('\'', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('\'', ostr); +} + +void DataTypeDateTime64::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + DateTime64 x; + if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808' + { + ::readText(x, scale, istr, settings, time_zone, utc_time_zone); + assertChar('\'', istr); + } + else /// Just 1504193808 or 01504193808 + { + readIntText(x, istr); + } + assert_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. +} + +void DataTypeDateTime64::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + writeChar('"', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('"', ostr); +} + +void DataTypeDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + DateTime64 x; + if (checkChar('"', istr)) + { + ::readText(x, scale, istr, settings, time_zone, utc_time_zone); + assertChar('"', istr); + } + else + { + readIntText(x, istr); + } + assert_cast(column).getData().push_back(x); +} + +void DataTypeDateTime64::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +{ + writeChar('"', ostr); + serializeText(column, row_num, ostr, settings); + writeChar('"', ostr); +} + +void DataTypeDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + DateTime64 x; + + if (istr.eof()) + throwReadAfterEOF(); + + char maybe_quote = *istr.position(); + + if (maybe_quote == '\'' || maybe_quote == '\"') + ++istr.position(); + + ::readText(x, scale, istr, settings, time_zone, utc_time_zone); + + if (maybe_quote == '\'' || maybe_quote == '\"') + assertChar(maybe_quote, istr); + + assert_cast(column).getData().push_back(x); +} + +void DataTypeDateTime64::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const +{ + if (value_index) + return; + value_index = static_cast(protobuf.writeDateTime64(assert_cast(column).getData()[row_num], scale)); +} + +void DataTypeDateTime64::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const +{ + row_added = false; + DateTime64 t; + if (!protobuf.readDateTime64(t, scale)) + return; + + auto & container = assert_cast(column).getData(); + if (allow_add_row) + { + container.emplace_back(t); + row_added = true; + } + else + container.back() = t; +} namespace ErrorCodes { @@ -377,26 +413,4 @@ void registerDataTypeDateTime(DataTypeFactory & factory) factory.registerAlias("TIMESTAMP", "DateTime", DataTypeFactory::CaseInsensitive); } -/// Explicit template instantiations. -template class DataTypeDateTimeBase; -template class DataTypeDateTimeBase; - -MutableColumnPtr DataTypeDateTime::createColumn() const -{ - return ColumnVector::create(); -} - -DataTypeDateTime64::DataTypeDateTime64(UInt8 scale_, const std::string & time_zone_name) - : Base(time_zone_name), - scale(scale_) -{ - // TODO(vnemkov): validate scale -} - -MutableColumnPtr DataTypeDateTime64::createColumn() const -{ - return ColumnDecimal::create(0, scale); -} - - } diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 6e0050d45aa..1ab4c1f8ef9 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -1,25 +1,32 @@ #pragma once -#include +#include #include - +#include class DateLUTImpl; -template class> -struct is_instance : public std::false_type {}; +//template class> +//struct is_instance : public std::false_type {}; -template class U> -struct is_instance, U> : public std::true_type {}; +//template class U> +//struct is_instance, U> : public std::true_type {}; namespace DB { -template -class ColumnDecimal; +class TimezoneMixin +{ +public: + explicit TimezoneMixin(const std::string & time_zone_name = ""); -template -class ColumnVector; + const DateLUTImpl & getTimeZone() const { return time_zone; } + +protected: + bool has_explicit_time_zone; + const DateLUTImpl & time_zone; + const DateLUTImpl & utc_time_zone; +}; /** DateTime stores time as unix timestamp. * The value itself is independent of time zone. @@ -28,7 +35,7 @@ class ColumnVector; * In text format it is serialized to and parsed from YYYY-MM-DD hh:mm:ss format. * The text format is dependent of time zone. * - * To constt from/to text format, time zone may be specified explicitly or implicit time zone may be used. + * To cast from/to text format, time zone may be specified explicitly or implicit time zone may be used. * * Time zone may be specified explicitly as type parameter, example: DateTime('Europe/Moscow'). * As it does not affect the internal representation of values, @@ -41,17 +48,13 @@ class ColumnVector; * Server time zone is the time zone specified in 'timezone' parameter in configuration file, * or system time zone at the moment of server startup. */ -template -class DataTypeDateTimeBase : public IDataType -{ +class DataTypeDateTime final : public DataTypeNumberBase, public TimezoneMixin { public: - using FieldType = DateTimeType; + explicit DataTypeDateTime(const std::string & time_zone_name = ""); - DataTypeDateTimeBase(const std::string & time_zone_name = ""); - - const char * getFamilyName() const override; + const char * getFamilyName() const override { return "DateTime"; } std::string doGetName() const override; - TypeIndex getTypeId() const override; + TypeIndex getTypeId() const override { return TypeIndex::DateTime; } void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; @@ -66,46 +69,38 @@ public: void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override; void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; - void serializeBinary(const Field&, WriteBuffer&) const override; - void deserializeBinary(Field&, ReadBuffer&) const override; - void serializeBinary(const IColumn&, size_t, WriteBuffer&) const override; - void deserializeBinary(IColumn&, ReadBuffer&) const override; - Field getDefault() const override; - bool canBePromoted() const override { return false; } - bool isParametric() const override { return true; } - bool haveSubtypes() const override { return false; } + bool canBeUsedAsVersion() const override { return true; } bool canBeInsideNullable() const override { return true; } bool equals(const IDataType & rhs) const override; - - - const DateLUTImpl & getTimeZone() const { return time_zone; } - -protected: - bool has_explicit_time_zone; - const DateLUTImpl & time_zone; - const DateLUTImpl & utc_time_zone; }; -struct DataTypeDateTime : DataTypeDateTimeBase { - using Base = DataTypeDateTimeBase; - using Base::Base; - - using ColumnType = ColumnVector; - - MutableColumnPtr createColumn() const override; -}; - -struct DataTypeDateTime64 : DataTypeDateTimeBase { - using Base = DataTypeDateTimeBase; - DataTypeDateTime64(UInt8 scale_, const std::string & time_zone_name = ""); - - using ColumnType = ColumnDecimal; - MutableColumnPtr createColumn() const override; - +/** DateTime64 is same as DateTime, but it stores values as UInt64 and has configurable sub-second part. + * + * `scale` determines number of decimal places for sub-second part of the DateTime64. + */ +class DataTypeDateTime64 final : public DataTypeDecimalBase, public TimezoneMixin { +public: static constexpr UInt8 default_scale = 3; -private: - const UInt8 scale; + + explicit DataTypeDateTime64(UInt8 scale_, const std::string & time_zone_name = ""); + + const char * getFamilyName() const override { return "DateTime64"; } + std::string doGetName() const override; + TypeIndex getTypeId() const override { return TypeIndex::DateTime64; } + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; + void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override; + void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; }; } diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.cpp b/dbms/src/DataTypes/DataTypeDecimalBase.cpp new file mode 100644 index 00000000000..4ee4f9176d3 --- /dev/null +++ b/dbms/src/DataTypes/DataTypeDecimalBase.cpp @@ -0,0 +1,214 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int ARGUMENT_OUT_OF_BOUND; +} + + +bool decimalCheckComparisonOverflow(const Context & context) { return context.getSettingsRef().decimal_check_overflow; } +bool decimalCheckArithmeticOverflow(const Context & context) { return context.getSettingsRef().decimal_check_overflow; } + +template +bool DataTypeDecimalBase::equals(const IDataType & rhs) const +{ + if (auto * ptype = dynamic_cast *>(&rhs)) + return scale == ptype->getScale(); + return false; +} + +template +void DataTypeDecimalBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +{ + T value = assert_cast(column).getData()[row_num]; + writeText(value, scale, ostr); +} + +template +bool DataTypeDecimalBase::tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale) +{ + UInt32 unread_scale = scale; + bool done = tryReadDecimalText(istr, x, precision, unread_scale); + x *= getScaleMultiplier(unread_scale); + return done; +} + +template +void DataTypeDecimalBase::readText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale, bool csv) +{ + UInt32 unread_scale = scale; + if (csv) + readCSVDecimalText(istr, x, precision, unread_scale); + else + readDecimalText(istr, x, precision, unread_scale); + x *= getScaleMultiplier(unread_scale); +} + +template +void DataTypeDecimalBase::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + T x; + readText(x, istr); + assert_cast(column).getData().push_back(x); +} + +template +void DataTypeDecimalBase::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const +{ + T x; + readText(x, istr, true); + assert_cast(column).getData().push_back(x); +} + +template +T DataTypeDecimalBase::parseFromString(const String & str) const +{ + ReadBufferFromMemory buf(str.data(), str.size()); + T x; + UInt32 unread_scale = scale; + readDecimalText(buf, x, precision, unread_scale, true); + x *= getScaleMultiplier(unread_scale); + return x; +} + + +template +void DataTypeDecimalBase::serializeBinary(const Field & field, WriteBuffer & ostr) const +{ + FieldType x = get>(field); + writeBinary(x, ostr); +} + +template +void DataTypeDecimalBase::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const +{ + const FieldType & x = assert_cast(column).getData()[row_num]; + writeBinary(x, ostr); +} + +template +void DataTypeDecimalBase::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const +{ + const typename ColumnType::Container & x = typeid_cast(column).getData(); + + size_t size = x.size(); + + if (limit == 0 || offset + limit > size) + limit = size - offset; + + ostr.write(reinterpret_cast(&x[offset]), sizeof(FieldType) * limit); +} + + +template +void DataTypeDecimalBase::deserializeBinary(Field & field, ReadBuffer & istr) const +{ + typename FieldType::NativeType x; + readBinary(x, istr); + field = DecimalField(T(x), scale); +} + +template +void DataTypeDecimalBase::deserializeBinary(IColumn & column, ReadBuffer & istr) const +{ + typename FieldType::NativeType x; + readBinary(x, istr); + assert_cast(column).getData().push_back(FieldType(x)); +} + +template +void DataTypeDecimalBase::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double) const +{ + typename ColumnType::Container & x = typeid_cast(column).getData(); + size_t initial_size = x.size(); + x.resize(initial_size + limit); + size_t size = istr.readBig(reinterpret_cast(&x[initial_size]), sizeof(FieldType) * limit); + x.resize(initial_size + size / sizeof(FieldType)); +} + + +template +void DataTypeDecimalBase::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const +{ + if (value_index) + return; + value_index = static_cast(protobuf.writeDecimal(assert_cast(column).getData()[row_num], scale)); +} + + +template +void DataTypeDecimalBase::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const +{ + row_added = false; + T decimal; + if (!protobuf.readDecimal(decimal, precision, scale)) + return; + + auto & container = assert_cast(column).getData(); + if (allow_add_row) + { + container.emplace_back(decimal); + row_added = true; + } + else + container.back() = decimal; +} + + +template +Field DataTypeDecimalBase::getDefault() const +{ + return DecimalField(T(0), scale); +} + +template +MutableColumnPtr DataTypeDecimalBase::createColumn() const +{ + return ColumnType::create(0, scale); +} + +template <> +Decimal32 DataTypeDecimalBase::getScaleMultiplier(UInt32 scale_) +{ + return decimalScaleMultiplier(scale_); +} + +template <> +Decimal64 DataTypeDecimalBase::getScaleMultiplier(UInt32 scale_) +{ + return decimalScaleMultiplier(scale_); +} + +template <> +Decimal128 DataTypeDecimalBase::getScaleMultiplier(UInt32 scale_) +{ + return decimalScaleMultiplier(scale_); +} + + +/// Explicit template instantiations. +template class DataTypeDecimalBase; +template class DataTypeDecimalBase; +template class DataTypeDecimalBase; + +} diff --git a/dbms/src/DataTypes/DataTypeDecimalBase.h b/dbms/src/DataTypes/DataTypeDecimalBase.h new file mode 100644 index 00000000000..0b134ff7947 --- /dev/null +++ b/dbms/src/DataTypes/DataTypeDecimalBase.h @@ -0,0 +1,363 @@ +#pragma once +#include + +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ARGUMENT_OUT_OF_BOUND; + extern const int CANNOT_CONVERT_TYPE; + extern const int DECIMAL_OVERFLOW; +} + +class Context; +bool decimalCheckComparisonOverflow(const Context & context); +bool decimalCheckArithmeticOverflow(const Context & context); + + +static constexpr size_t minDecimalPrecision() { return 1; } +template static constexpr size_t maxDecimalPrecision() { return 0; } +template <> constexpr size_t maxDecimalPrecision() { return 9; } +template <> constexpr size_t maxDecimalPrecision() { return 18; } +template <> constexpr size_t maxDecimalPrecision() { return 38; } + +inline UInt32 leastDecimalPrecisionFor(TypeIndex int_type) +{ + switch (int_type) + { + case TypeIndex::Int8: [[fallthrough]]; + case TypeIndex::UInt8: + return 3; + case TypeIndex::Int16: [[fallthrough]]; + case TypeIndex::UInt16: + return 5; + case TypeIndex::Int32: [[fallthrough]]; + case TypeIndex::UInt32: + return 10; + case TypeIndex::Int64: + return 19; + case TypeIndex::UInt64: + return 20; + default: + break; + } + return 0; +} + +/// Base class for decimals, like Decimal(P, S), where P is precision, S is scale. +/// Maximum precisions for underlying types are: +/// Int32 9 +/// Int64 18 +/// Int128 38 +/// Operation between two decimals leads to Decimal(P, S), where +/// P is one of (9, 18, 38); equals to the maximum precision for the biggest underlying type of operands. +/// S is maximum scale of operands. The allowed valuas are [0, precision] +template +class DataTypeDecimalBase : public DataTypeWithSimpleSerialization +{ + static_assert(IsDecimalNumber); + +public: + using FieldType = T; + using ColumnType = ColumnDecimal; + + static constexpr bool is_parametric = true; + + static constexpr size_t maxPrecision() { return maxDecimalPrecision(); } + + DataTypeDecimalBase(UInt32 precision_, UInt32 scale_) + : precision(precision_), + scale(scale_) + { + if (unlikely(precision < 1 || precision > maxPrecision())) + throw Exception("Precision " + std::to_string(precision) + " is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + if (unlikely(scale < 0 || static_cast(scale) > maxPrecision())) + throw Exception("Scale " + std::to_string(scale) + " is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + } + + TypeIndex getTypeId() const override { return TypeId::value; } + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override; + + void serializeBinary(const Field & field, WriteBuffer & ostr) const override; + void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override; + void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override; + + void deserializeBinary(Field & field, ReadBuffer & istr) const override; + void deserializeBinary(IColumn & column, ReadBuffer & istr) const override; + void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override; + + void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override; + void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override; + + Field getDefault() const override; + MutableColumnPtr createColumn() const override; + bool equals(const IDataType & rhs) const override; + + bool isParametric() const override { return true; } + bool haveSubtypes() const override { return false; } + bool shouldAlignRightInPrettyFormats() const override { return true; } + bool textCanContainOnlyValidUTF8() const override { return true; } + bool isComparable() const override { return true; } + bool isValueRepresentedByNumber() const override { return true; } + bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; } + bool haveMaximumSizeOfValue() const override { return true; } + size_t getSizeOfValueInMemory() const override { return sizeof(T); } + + bool isSummable() const override { return true; } + bool canBeUsedInBooleanContext() const override { return true; } + bool canBeInsideNullable() const override { return true; } + + /// Decimal specific + + UInt32 getPrecision() const { return precision; } + UInt32 getScale() const { return scale; } + T getScaleMultiplier() const { return getScaleMultiplier(scale); } + + T wholePart(T x) const + { + if (scale == 0) + return x; + return x / getScaleMultiplier(); + } + + T fractionalPart(T x) const + { + if (scale == 0) + return 0; + if (x < T(0)) + x *= T(-1); + return x % getScaleMultiplier(); + } + + T maxWholeValue() const { return getScaleMultiplier(maxPrecision() - scale) - T(1); } + + bool canStoreWhole(T x) const + { + T max = maxWholeValue(); + if (x > max || x < -max) + return false; + return true; + } + + /// @returns multiplier for U to become T with correct scale + template + T scaleFactorFor(const DataTypeDecimalBase & x, bool) const + { + if (getScale() < x.getScale()) + throw Exception("Decimal result's scale is less than argiment's one", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + UInt32 scale_delta = getScale() - x.getScale(); /// scale_delta >= 0 + return getScaleMultiplier(scale_delta); + } + + template + T scaleFactorFor(const DataTypeNumber & , bool is_multiply_or_divisor) const + { + if (is_multiply_or_divisor) + return 1; + return getScaleMultiplier(); + } + + T parseFromString(const String & str) const; + + void readText(T & x, ReadBuffer & istr, bool csv = false) const { readText(x, istr, precision, scale, csv); } + static void readText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale, bool csv = false); + static bool tryReadText(T & x, ReadBuffer & istr, UInt32 precision, UInt32 scale); + static T getScaleMultiplier(UInt32 scale); + +protected: + const UInt32 precision; + const UInt32 scale; +}; + + +// TODO (vnemkov): enable only if both tx and ty are derived from DecimalBase and are essentially same type with different type-params. +template typename DecimalType> +typename std::enable_if_t<(sizeof(T) >= sizeof(U)), DecimalType> +decimalResultType(const DecimalType & tx, const DecimalType & ty, bool is_multiply, bool is_divide) +{ + UInt32 scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale()); + if (is_multiply) + scale = tx.getScale() + ty.getScale(); + else if (is_divide) + scale = tx.getScale(); + return DecimalType(maxDecimalPrecision(), scale); +} + +template typename DecimalType> +typename std::enable_if_t<(sizeof(T) < sizeof(U)), const DecimalType> +decimalResultType(const DecimalType & tx, const DecimalType & ty, bool is_multiply, bool is_divide) +{ + UInt32 scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale()); + if (is_multiply) + scale = tx.getScale() * ty.getScale(); + else if (is_divide) + scale = tx.getScale(); + return DecimalType(maxDecimalPrecision(), scale); +} + +template typename DecimalType> +const DecimalType decimalResultType(const DecimalType & tx, const DataTypeNumber &, bool, bool) +{ + return DecimalType(maxDecimalPrecision(), tx.getScale()); +} + +template typename DecimalType> +const DecimalType decimalResultType(const DataTypeNumber &, const DecimalType & ty, bool, bool) +{ + return DecimalType(maxDecimalPrecision(), ty.getScale()); +} + + +////// TODO (vnemkov): make that work for DecimalBase-derived types +//template typename DecimalType> +//inline const DecimalType * checkDecimal(const IDataType & data_type) +//{ +// return typeid_cast *>(&data_type); +//} + +//inline UInt32 getDecimalScale(const IDataType & data_type, UInt32 default_value = std::numeric_limits::max()) +//{ +// if (auto * decimal_type = checkDecimal(data_type)) +// return decimal_type->getScale(); +// if (auto * decimal_type = checkDecimal(data_type)) +// return decimal_type->getScale(); +// if (auto * decimal_type = checkDecimal(data_type)) +// return decimal_type->getScale(); +// return default_value; +//} + +/// + +template constexpr bool IsDataTypeDecimal = false; +template constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal || IsDataTypeNumber; + +//template +//inline std::enable_if_t && IsDataTypeDecimal, typename ToDataType::FieldType> +//convertDecimals(const typename FromDataType::FieldType & value, UInt32 scale_from, UInt32 scale_to) +//{ +// using FromFieldType = typename FromDataType::FieldType; +// using ToFieldType = typename ToDataType::FieldType; +// using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)), FromFieldType, ToFieldType>; +// using MaxNativeType = typename MaxFieldType::NativeType; + +// MaxNativeType converted_value; +// if (scale_to > scale_from) +// { +// converted_value = DataTypeDecimal::getScaleMultiplier(scale_to - scale_from); +// if (common::mulOverflow(static_cast(value), converted_value, converted_value)) +// throw Exception("Decimal convert overflow", ErrorCodes::DECIMAL_OVERFLOW); +// } +// else +// converted_value = value / DataTypeDecimal::getScaleMultiplier(scale_from - scale_to); + +// if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType)) +// { +// if (converted_value < std::numeric_limits::min() || +// converted_value > std::numeric_limits::max()) +// throw Exception("Decimal convert overflow", ErrorCodes::DECIMAL_OVERFLOW); +// } + +// return converted_value; +//} + +//template +//inline std::enable_if_t && IsDataTypeNumber, typename ToDataType::FieldType> +//convertFromDecimal(const typename FromDataType::FieldType & value, UInt32 scale) +//{ +// using FromFieldType = typename FromDataType::FieldType; +// using ToFieldType = typename ToDataType::FieldType; + +// if constexpr (std::is_floating_point_v) +// return static_cast(value) / FromDataType::getScaleMultiplier(scale); +// else +// { +// FromFieldType converted_value = convertDecimals(value, scale, 0); + +// if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType) || !std::numeric_limits::is_signed) +// { +// if constexpr (std::numeric_limits::is_signed) +// { +// if (converted_value < std::numeric_limits::min() || +// converted_value > std::numeric_limits::max()) +// throw Exception("Decimal convert overflow", ErrorCodes::DECIMAL_OVERFLOW); +// } +// else +// { +// using CastIntType = std::conditional_t, Int128, Int64>; + +// if (converted_value < 0 || +// converted_value > static_cast(std::numeric_limits::max())) +// throw Exception("Decimal convert overflow", ErrorCodes::DECIMAL_OVERFLOW); +// } +// } +// return converted_value; +// } +//} + +//template +//inline std::enable_if_t && IsDataTypeDecimal, typename ToDataType::FieldType> +//convertToDecimal(const typename FromDataType::FieldType & value, UInt32 scale) +//{ +// using FromFieldType = typename FromDataType::FieldType; +// using ToNativeType = typename ToDataType::FieldType::NativeType; + +// if constexpr (std::is_floating_point_v) +// { +// if (!std::isfinite(value)) +// throw Exception("Decimal convert overflow. Cannot convert infinity or NaN to decimal", ErrorCodes::DECIMAL_OVERFLOW); + +// auto out = value * ToDataType::getScaleMultiplier(scale); +// if constexpr (std::is_same_v) +// { +// static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; +// static constexpr __int128 max_int128 = (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; +// if (out <= static_cast(min_int128) || out >= static_cast(max_int128)) +// throw Exception("Decimal convert overflow. Float is out of Decimal range", ErrorCodes::DECIMAL_OVERFLOW); +// } +// else +// { +// if (out <= std::numeric_limits::min() || out >= std::numeric_limits::max()) +// throw Exception("Decimal convert overflow. Float is out of Decimal range", ErrorCodes::DECIMAL_OVERFLOW); +// } +// return out; +// } +// else +// { +// if constexpr (std::is_same_v) +// if (value > static_cast(std::numeric_limits::max())) +// return convertDecimals, ToDataType>(value, 0, scale); +// return convertDecimals, ToDataType>(value, 0, scale); +// } +//} + +template