From e40a384372cd5bfe8dfa85d22618837842c557ac Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Thu, 28 Mar 2019 22:33:01 +0000 Subject: [PATCH 1/2] add datetime64 definition --- dbms/src/Core/Types.h | 1 + dbms/src/DataTypes/DataTypeDateTime.cpp | 137 ++++++++++++++++++------ dbms/src/DataTypes/DataTypeDateTime.h | 18 +++- dbms/src/Formats/ProtobufReader.h | 1 + dbms/src/Formats/ProtobufWriter.h | 1 + 5 files changed, 124 insertions(+), 34 deletions(-) diff --git a/dbms/src/Core/Types.h b/dbms/src/Core/Types.h index e4882cd64f7..5fa70e668bd 100644 --- a/dbms/src/Core/Types.h +++ b/dbms/src/Core/Types.h @@ -74,6 +74,7 @@ enum class TypeIndex Float64, Date, DateTime, + DateTime64, String, FixedString, Enum8, diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index f3d6efa1488..9f229ffdd95 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -20,29 +20,64 @@ namespace DB { -DataTypeDateTime::DataTypeDateTime(const std::string & time_zone_name) +template +struct TypeGetter; + +template<> +struct TypeGetter { + using Type = time_t; + using Column = ColumnUInt32; + static constexpr TypeIndex Index = TypeIndex::DateTime; + static constexpr const char * Name = "DateTime"; +}; + +template<> +struct TypeGetter { + using Type = UInt64; + using Column = ColumnUInt64; + static constexpr TypeIndex Index = TypeIndex::DateTime64; + static constexpr const char * Name = "DateTime64"; +}; + +template +DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_zone_name) : has_explicit_time_zone(!time_zone_name.empty()), time_zone(DateLUT::instance(time_zone_name)), utc_time_zone(DateLUT::instance("UTC")) { } -std::string DataTypeDateTime::doGetName() const +template +const char * DataTypeDateTimeBase::getFamilyName() const +{ + return TypeGetter::Name; +} + +template +std::string DataTypeDateTimeBase::doGetName() const { if (!has_explicit_time_zone) - return "DateTime"; + return TypeGetter::Name; WriteBufferFromOwnString out; - out << "DateTime(" << quote << time_zone.getTimeZone() << ")"; + out << TypeGetter::Name << "(" << quote << time_zone.getTimeZone() << ")"; return out.str(); } -void DataTypeDateTime::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +template +TypeIndex DataTypeDateTimeBase::getTypeId() const { - writeDateTimeText(static_cast(column).getData()[row_num], ostr, time_zone); + return TypeGetter::Index; } -void DataTypeDateTime::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +{ + writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); +} + +template +void DataTypeDateTimeBase::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { serializeText(column, row_num, ostr, settings); } @@ -61,24 +96,41 @@ static inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings } } - -void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +static inline void readText(UInt64 & /*x*/, ReadBuffer & /*istr*/, const FormatSettings & /*settings*/, const DateLUTImpl & /*time_zone*/, const DateLUTImpl & /*utc_time_zone*/) { - time_t x; - readText(x, istr, settings, time_zone, utc_time_zone); - static_cast(column).getData().push_back(x); + // TODO implement this +// return; +// switch (settings.date_time_input_format) +// { +// case FormatSettings::DateTimeInputFormat::Basic: +// readDateTimeText(x, istr, time_zone); +// return; +// case FormatSettings::DateTimeInputFormat::BestEffort: +// parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone); +// return; +// } } -void DataTypeDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +{ + typename TypeGetter::Type x; + readText(x, istr, settings, time_zone, utc_time_zone); + static_cast::Column &>(column).getData().push_back(x); +} + +template +void DataTypeDateTimeBase::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('\'', ostr); serializeText(column, row_num, ostr, settings); writeChar('\'', ostr); } -void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808' { readText(x, istr, settings, time_zone, utc_time_zone); @@ -88,19 +140,21 @@ void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr { readIntText(x, istr); } - static_cast(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. + static_cast::Column &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety. } -void DataTypeDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('"', ostr); serializeText(column, row_num, ostr, settings); writeChar('"', ostr); } -void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (checkChar('"', istr)) { readText(x, istr, settings, time_zone, utc_time_zone); @@ -110,19 +164,21 @@ void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, { readIntText(x, istr); } - static_cast(column).getData().push_back(x); + static_cast::Column &>(column).getData().push_back(x); } -void DataTypeDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { writeChar('"', ostr); serializeText(column, row_num, ostr, settings); writeChar('"', ostr); } -void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const +template +void DataTypeDateTimeBase::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const { - time_t x; + typename TypeGetter::Type x; if (istr.eof()) throwReadAfterEOF(); @@ -137,24 +193,27 @@ void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, c if (maybe_quote == '\'' || maybe_quote == '\"') assertChar(maybe_quote, istr); - static_cast(column).getData().push_back(x); + static_cast::Column &>(column).getData().push_back(x); } -void DataTypeDateTime::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const +template +void DataTypeDateTimeBase::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const { if (value_index) return; - value_index = static_cast(protobuf.writeDateTime(static_cast(column).getData()[row_num])); + typename TypeGetter::Type t = static_cast::Column &>(column).getData()[row_num]; + value_index = static_cast(protobuf.writeDateTime(t)); } -void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const +template +void DataTypeDateTimeBase::deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const { row_added = false; - time_t t; + typename TypeGetter::Type t; if (!protobuf.readDateTime(t)) return; - auto & container = static_cast(column).getData(); + auto & container = static_cast::Column &>(column).getData(); if (allow_add_row) { container.emplace_back(t); @@ -164,7 +223,8 @@ void DataTypeDateTime::deserializeProtobuf(IColumn & column, ProtobufReader & pr container.back() = t; } -bool DataTypeDateTime::equals(const IDataType & rhs) const +template +bool DataTypeDateTimeBase::equals(const IDataType & rhs) const { /// DateTime with different timezones are equal, because: /// "all types with different time zones are equivalent and may be used interchangingly." @@ -193,9 +253,26 @@ static DataTypePtr create(const ASTPtr & arguments) return std::make_shared(arg->value.get()); } +static DataTypePtr create64(const ASTPtr & arguments) +{ + if (!arguments) + return std::make_shared(); + + if (arguments->children.size() != 1) + throw Exception("DateTime64 data type can optionally have only one argument - time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto * arg = arguments->children[0]->as(); + if (!arg || arg->value.getType() != Field::Types::String) + throw Exception("Parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(arg->value.get()); +} + void registerDataTypeDateTime(DataTypeFactory & factory) { factory.registerDataType("DateTime", create, DataTypeFactory::CaseInsensitive); + factory.registerDataType("DateTime64", create64, DataTypeFactory::CaseInsensitive); + factory.registerAlias("TIMESTAMP", "DateTime", DataTypeFactory::CaseInsensitive); } diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index 679a2777472..ba6116a2222 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -28,14 +28,15 @@ namespace DB * Server time zone is the time zone specified in 'timezone' parameter in configuration file, * or system time zone at the moment of server startup. */ -class DataTypeDateTime final : public DataTypeNumberBase +template +class DataTypeDateTimeBase : public DataTypeNumberBase { public: - DataTypeDateTime(const std::string & time_zone_name = ""); + DataTypeDateTimeBase(const std::string & time_zone_name = ""); - const char * getFamilyName() const override { return "DateTime"; } + const char * getFamilyName() const override; std::string doGetName() const override; - TypeIndex getTypeId() const override { return TypeIndex::DateTime; } + TypeIndex getTypeId() const override; void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; @@ -62,4 +63,13 @@ private: const DateLUTImpl & utc_time_zone; }; +struct DataTypeDateTime : DataTypeDateTimeBase { + using DataTypeDateTimeBase::DataTypeDateTimeBase; +}; + +struct DataTypeDateTime64 : DataTypeDateTimeBase { + using DataTypeDateTimeBase::DataTypeDateTimeBase; +}; + } + diff --git a/dbms/src/Formats/ProtobufReader.h b/dbms/src/Formats/ProtobufReader.h index b9b1ac36c51..f732312393d 100644 --- a/dbms/src/Formats/ProtobufReader.h +++ b/dbms/src/Formats/ProtobufReader.h @@ -72,6 +72,7 @@ public: bool readUUID(UUID & uuid) { return current_converter->readUUID(uuid); } bool readDate(DayNum & date) { return current_converter->readDate(date); } bool readDateTime(time_t & tm) { return current_converter->readDateTime(tm); } + bool readDateTime(UInt64 & tm) { return current_converter->readUInt64(tm); } bool readDecimal(Decimal32 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal32(decimal, precision, scale); } bool readDecimal(Decimal64 & decimal, UInt32 precision, UInt32 scale) { return current_converter->readDecimal64(decimal, precision, scale); } diff --git a/dbms/src/Formats/ProtobufWriter.h b/dbms/src/Formats/ProtobufWriter.h index aba3a2b2dc6..9a1df919b3b 100644 --- a/dbms/src/Formats/ProtobufWriter.h +++ b/dbms/src/Formats/ProtobufWriter.h @@ -70,6 +70,7 @@ public: bool writeUUID(const UUID & uuid) { return writeValueIfPossible(&IConverter::writeUUID, uuid); } bool writeDate(DayNum date) { return writeValueIfPossible(&IConverter::writeDate, date); } bool writeDateTime(time_t tm) { return writeValueIfPossible(&IConverter::writeDateTime, tm); } + bool writeDateTime(UInt64 tm) { return writeValueIfPossible(&IConverter::writeUInt64, tm); } bool writeDecimal(Decimal32 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal32, decimal, scale); } bool writeDecimal(Decimal64 decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal64, decimal, scale); } bool writeDecimal(const Decimal128 & decimal, UInt32 scale) { return writeValueIfPossible(&IConverter::writeDecimal128, decimal, scale); } From 9407a24759771cd2afb6cf7250e720d0d291351f Mon Sep 17 00:00:00 2001 From: Martijn Bakker Date: Mon, 1 Apr 2019 17:18:13 +0100 Subject: [PATCH 2/2] able to insert DateTime64 objects into the table --- dbms/src/DataTypes/DataTypeDateTime.cpp | 71 +++++++++++++++++-- dbms/src/DataTypes/DataTypeDateTime.h | 18 ++++- .../0_stateless/00921_datetime64.reference | 3 + .../queries/0_stateless/00921_datetime64.sql | 15 ++++ 4 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 dbms/tests/queries/0_stateless/00921_datetime64.reference create mode 100644 dbms/tests/queries/0_stateless/00921_datetime64.sql diff --git a/dbms/src/DataTypes/DataTypeDateTime.cpp b/dbms/src/DataTypes/DataTypeDateTime.cpp index 9f229ffdd95..502f4562dbb 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.cpp +++ b/dbms/src/DataTypes/DataTypeDateTime.cpp @@ -16,6 +16,7 @@ #include +#include namespace DB { @@ -47,6 +48,23 @@ DataTypeDateTimeBase::DataTypeDateTimeBase(const std::string & time_ { } +DataTypeDateTime64::Precision parsePrecision(const std::string & precision_name) +{ + if (precision_name == "MILLI") + return DataTypeDateTime64::Precision::Millis; + else if (precision_name == "MICRO") + return DataTypeDateTime64::Precision::Micros; + return DataTypeDateTime64::Precision::Nanos; +} + +DataTypeDateTime64::DataTypeDateTime64(const std::string & time_zone_name, const std::string & precision_name) + : DataTypeDateTimeBase(time_zone_name), + precision(parsePrecision(precision_name)) +{ +} + + + template const char * DataTypeDateTimeBase::getFamilyName() const { @@ -76,6 +94,43 @@ void DataTypeDateTimeBase::serializeText(const IColumn & column, siz writeDateTimeText(static_cast::Column &>(column).getData()[row_num], ostr, time_zone); } +void DataTypeDateTime64::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const +{ + time_t base_time; + auto full_time = static_cast(column).getData()[row_num]; + UInt32 time_fraction; + int pad_length = 0; + + switch(precision) { + case DataTypeDateTime64::Precision::Millis: { + base_time = full_time / MILLIS_PER_SECOND; + time_fraction = full_time % MILLIS_PER_SECOND; + pad_length = 3; + break; + } + case DataTypeDateTime64::Precision::Micros: { + base_time = full_time / MICROS_PER_SECOND; + time_fraction = full_time % MICROS_PER_SECOND; + pad_length = 6; + break; + } + case DataTypeDateTime64::Precision::Nanos: { + base_time = full_time / NANOS_PER_SECOND; + time_fraction = full_time % NANOS_PER_SECOND; + pad_length = 9; + break; + } + } + + writeDateTimeText(base_time, ostr, time_zone); + writeText(".", 1, ostr); + + /// TODO make this efficient + std::stringstream ss; + ss << std::setfill('0') << std::setw(pad_length) << time_fraction; + writeText(ss.str(), ostr); +} + template void DataTypeDateTimeBase::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const { @@ -258,14 +313,18 @@ static DataTypePtr create64(const ASTPtr & arguments) if (!arguments) return std::make_shared(); - if (arguments->children.size() != 1) - throw Exception("DateTime64 data type can optionally have only one argument - time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (arguments->children.size() != 2) + throw Exception("DateTime64 data type can optionally have 2 arguments - precision and time zone name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - const auto * arg = arguments->children[0]->as(); - if (!arg || arg->value.getType() != Field::Types::String) - throw Exception("Parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + const auto * timezone_arg = arguments->children[0]->as(); + if (!timezone_arg || timezone_arg->value.getType() != Field::Types::String) + throw Exception("Timezone parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return std::make_shared(arg->value.get()); + const auto * precision_arg = arguments->children[1]->as(); + if (!precision_arg || precision_arg->value.getType() != Field::Types::String) + throw Exception("Precision parameter for DateTime64 data type must be string literal", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(timezone_arg->value.get(), precision_arg->value.get()); } void registerDataTypeDateTime(DataTypeFactory & factory) diff --git a/dbms/src/DataTypes/DataTypeDateTime.h b/dbms/src/DataTypes/DataTypeDateTime.h index ba6116a2222..5e0d0d6085c 100644 --- a/dbms/src/DataTypes/DataTypeDateTime.h +++ b/dbms/src/DataTypes/DataTypeDateTime.h @@ -57,7 +57,7 @@ public: const DateLUTImpl & getTimeZone() const { return time_zone; } -private: +protected: bool has_explicit_time_zone; const DateLUTImpl & time_zone; const DateLUTImpl & utc_time_zone; @@ -68,7 +68,21 @@ struct DataTypeDateTime : DataTypeDateTimeBase { }; struct DataTypeDateTime64 : DataTypeDateTimeBase { - using DataTypeDateTimeBase::DataTypeDateTimeBase; + enum class Precision { + Millis, + Micros, + Nanos, + }; + static constexpr UInt32 MILLIS_PER_SECOND = 1000; + static constexpr UInt32 MICROS_PER_SECOND = 1000 * 1000; + static constexpr UInt32 NANOS_PER_SECOND = 1000 * 1000 * 1000; + + DataTypeDateTime64(const std::string & time_zone_name = "", const std::string & precision_name = ""); + + void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; + +private: + const Precision precision; }; } diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.reference b/dbms/tests/queries/0_stateless/00921_datetime64.reference new file mode 100644 index 00000000000..c866f4b76b8 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00921_datetime64.reference @@ -0,0 +1,3 @@ +2 1970-01-01 01:00:01.000000001 1 0 +2 1970-01-01 01:00:01.000000003 3 3 +2 1970-01-01 01:00:01.000000005 5 3 diff --git a/dbms/tests/queries/0_stateless/00921_datetime64.sql b/dbms/tests/queries/0_stateless/00921_datetime64.sql new file mode 100644 index 00000000000..82938dbf5ed --- /dev/null +++ b/dbms/tests/queries/0_stateless/00921_datetime64.sql @@ -0,0 +1,15 @@ +USE test; + +DROP TABLE IF EXISTS A; +DROP TABLE IF EXISTS B; + +CREATE TABLE A(k UInt32, t DateTime64, a Float64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO A(k,t,a) VALUES (2,1000000001,1),(2,1000000003,3),(2,1000000005,5); + +CREATE TABLE B(k UInt32, t DateTime64, b Float64) ENGINE = MergeTree() ORDER BY (k, t); +INSERT INTO B(k,t,b) VALUES (2,1000000003,3); + +SELECT k, t, a, b FROM A ASOF LEFT JOIN B USING(k,t) ORDER BY (k,t); + +DROP TABLE B; +DROP TABLE A;