From e1dca98ecb1923a24e300d6fe3aa43af965783dd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 Aug 2011 02:08:22 +0000 Subject: [PATCH] dbms: development. --- dbms/include/DB/Columns/ColumnFixedArray.h | 6 +- dbms/include/DB/Columns/ColumnFixedString.h | 54 +++++++++ dbms/include/DB/Core/ErrorCodes.h | 1 + dbms/include/DB/DataTypes/DataTypeDate.h | 34 ++---- dbms/include/DB/DataTypes/DataTypeDateTime.h | 58 +++++++++ .../DB/DataTypes/DataTypeFixedString.h | 52 ++++++++ dbms/include/DB/IO/ReadHelpers.h | 24 ++++ dbms/include/DB/IO/WriteHelpers.h | 37 ++++++ dbms/src/DataTypes/DataTypeFixedString.cpp | 113 ++++++++++++++++++ dbms/src/DataTypes/DataTypeString.cpp | 2 +- dbms/src/Storages/tests/hit_log.cpp | 24 ++-- 11 files changed, 367 insertions(+), 38 deletions(-) create mode 100644 dbms/include/DB/Columns/ColumnFixedString.h create mode 100644 dbms/include/DB/DataTypes/DataTypeDateTime.h create mode 100644 dbms/include/DB/DataTypes/DataTypeFixedString.h create mode 100644 dbms/src/DataTypes/DataTypeFixedString.cpp diff --git a/dbms/include/DB/Columns/ColumnFixedArray.h b/dbms/include/DB/Columns/ColumnFixedArray.h index 28e7c175277..c6b5a0be38d 100644 --- a/dbms/include/DB/Columns/ColumnFixedArray.h +++ b/dbms/include/DB/Columns/ColumnFixedArray.h @@ -23,7 +23,7 @@ public: ColumnFixedArray(SharedPtr nested_column, size_t n_) : data(nested_column), n(n_) { - data.clear(); + clear(); } SharedPtr cloneEmpty() const @@ -68,7 +68,7 @@ public: void clear() { - data.clear(); + data->clear(); } /** Более эффективные методы манипуляции */ @@ -82,7 +82,7 @@ public: return *data; } -private: +protected: SharedPtr data; const size_t n; }; diff --git a/dbms/include/DB/Columns/ColumnFixedString.h b/dbms/include/DB/Columns/ColumnFixedString.h new file mode 100644 index 00000000000..50bc15364ae --- /dev/null +++ b/dbms/include/DB/Columns/ColumnFixedString.h @@ -0,0 +1,54 @@ +#pragma once + +#include // memcpy + +#include +#include + + +namespace DB +{ + +/** Cтолбeц значений типа "строка фиксированной длины". + * Отличается от массива UInt8 фиксированной длины только получением элемента (в виде String, а не Array) + * Если вставить строку меньшей длины, то она будет дополнена нулевыми байтами. + */ +class ColumnFixedString : public ColumnFixedArray +{ +private: + ColumnUInt8::Container_t & char_data; + +public: + /** Создать пустой столбец строк фиксированной длины n */ + ColumnFixedString(size_t n) + : ColumnFixedArray(new ColumnUInt8(), n), + char_data(dynamic_cast(*data).getData()) + { + } + + SharedPtr cloneEmpty() const + { + return new ColumnFixedString(n); + } + + Field operator[](size_t index) const + { + return String(reinterpret_cast(&char_data[n * index]), n); + } + + void insert(const Field & x) + { + const String & s = boost::get(x); + size_t old_size = char_data.size(); + char_data.resize(old_size + n); + memcpy(&char_data[old_size], s.data(), s.size()); + } + + void insertDefault() + { + char_data.resize(char_data.size() + n); + } +}; + + +} diff --git a/dbms/include/DB/Core/ErrorCodes.h b/dbms/include/DB/Core/ErrorCodes.h index f8afa0ee96e..59e4b6e7b52 100644 --- a/dbms/include/DB/Core/ErrorCodes.h +++ b/dbms/include/DB/Core/ErrorCodes.h @@ -48,6 +48,7 @@ namespace ErrorCodes CANNOT_PARSE_DATE, TOO_LARGE_SIZE_COMPRESSED, CHECKSUM_DOESNT_MATCH, + CANNOT_PARSE_DATETIME, }; } diff --git a/dbms/include/DB/DataTypes/DataTypeDate.h b/dbms/include/DB/DataTypes/DataTypeDate.h index 044182aab3d..855d3f1b4c4 100644 --- a/dbms/include/DB/DataTypes/DataTypeDate.h +++ b/dbms/include/DB/DataTypes/DataTypeDate.h @@ -1,10 +1,4 @@ -#ifndef DBMS_DATA_TYPES_NUMBER_FIXED_H -#define DBMS_DATA_TYPES_NUMBER_FIXED_H - -#include - -#include -#include +#pragma once #include #include @@ -16,36 +10,24 @@ namespace DB { -class DataTypeDate : public IDataTypeNumberFixed +class DataTypeDate : public IDataTypeNumberFixed { -private: - DateLUTSingleton & date_lut; - public: - DataTypeDate() : date_lut(DateLUTSingleton::instance()) {} + DataTypeDate() {} std::string getName() const { return "Date"; } SharedPtr clone() const { return new DataTypeDate; } void serializeText(const Field & field, WriteBuffer & ostr) const { - DateLUT::Values & values = date_lut.getValues(boost::get(field)); - writeIntText(values.year, ostr); - writeChar('-', ostr); - writeIntText(values.month, ostr); - writeChar('-', ostr); - writeIntText(values.day_of_month, ostr); + writeDateText(boost::get(field), ostr); } void deserializeText(Field & field, ReadBuffer & istr) const { - std::string s; - readString(s, istr); - - // TODO: тормоза - int time_zone_diff = 0; - field = date_lut.toDayNum(Poco::DateTimeParser::parse( - s, time_zone_diff).timestamp().epochTime()); + Yandex::DayNum_t x; + readDateText(x, istr); + field = x; } void serializeTextEscaped(const Field & field, WriteBuffer & ostr) const @@ -74,5 +56,3 @@ public: }; } - -#endif diff --git a/dbms/include/DB/DataTypes/DataTypeDateTime.h b/dbms/include/DB/DataTypes/DataTypeDateTime.h new file mode 100644 index 00000000000..75456f4ffc9 --- /dev/null +++ b/dbms/include/DB/DataTypes/DataTypeDateTime.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include + +#include +#include + + +namespace DB +{ + +class DataTypeDateTime : public IDataTypeNumberFixed +{ +public: + DataTypeDateTime() {} + + std::string getName() const { return "DateTime"; } + SharedPtr clone() const { return new DataTypeDateTime; } + + void serializeText(const Field & field, WriteBuffer & ostr) const + { + writeDateTimeText(boost::get(field), ostr); + } + + void deserializeText(Field & field, ReadBuffer & istr) const + { + time_t x; + readDateTimeText(x, istr); + field = x; + } + + void serializeTextEscaped(const Field & field, WriteBuffer & ostr) const + { + serializeText(field, ostr); + } + + void deserializeTextEscaped(Field & field, ReadBuffer & istr) const + { + deserializeText(field, istr); + } + + void serializeTextQuoted(const Field & field, WriteBuffer & ostr, bool compatible = false) const + { + writeChar('\'', ostr); + serializeText(field, ostr); + writeChar('\'', ostr); + } + + void deserializeTextQuoted(Field & field, ReadBuffer & istr, bool compatible = false) const + { + assertString("'", istr); + deserializeText(field, istr); + assertString("'", istr); + } +}; + +} diff --git a/dbms/include/DB/DataTypes/DataTypeFixedString.h b/dbms/include/DB/DataTypes/DataTypeFixedString.h new file mode 100644 index 00000000000..71fb8e28d01 --- /dev/null +++ b/dbms/include/DB/DataTypes/DataTypeFixedString.h @@ -0,0 +1,52 @@ +#pragma once + +#include + +#include +#include + +#include + + +namespace DB +{ + +using Poco::SharedPtr; + + +class DataTypeFixedString : public IDataType +{ +private: + size_t n; + +public: + DataTypeFixedString(size_t n_) : n(n_) {} + + std::string getName() const + { + return "FixedString(" + Poco::NumberFormatter::format(n) + ")"; + } + + SharedPtr clone() const + { + return new DataTypeFixedString(n); + } + + void serializeBinary(const Field & field, WriteBuffer & ostr) const; + void deserializeBinary(Field & field, ReadBuffer & istr) const; + void serializeBinary(const IColumn & column, WriteBuffer & ostr) const; + void deserializeBinary(IColumn & column, ReadBuffer & istr, size_t limit) const; + + void serializeText(const Field & field, WriteBuffer & ostr) const; + void deserializeText(Field & field, ReadBuffer & istr) const; + + void serializeTextEscaped(const Field & field, WriteBuffer & ostr) const; + void deserializeTextEscaped(Field & field, ReadBuffer & istr) const; + + void serializeTextQuoted(const Field & field, WriteBuffer & ostr, bool compatible = false) const; + void deserializeTextQuoted(Field & field, ReadBuffer & istr, bool compatible = false) const; + + SharedPtr createColumn() const; +}; + +} diff --git a/dbms/include/DB/IO/ReadHelpers.h b/dbms/include/DB/IO/ReadHelpers.h index 4d6e80b8538..82a801e274a 100644 --- a/dbms/include/DB/IO/ReadHelpers.h +++ b/dbms/include/DB/IO/ReadHelpers.h @@ -246,6 +246,30 @@ inline void readDateText(Yandex::DayNum_t & date, ReadBuffer & buf) date = Yandex::DateLUTSingleton::instance().makeDayNum(year, month, day); } + +/// в формате YYYY-MM-DD HH:MM:SS, согласно текущему часовому поясу +inline void readDateTimeText(time_t & datetime, ReadBuffer & buf) +{ + char s[19]; + size_t size = buf.read(s, 19); + if (19 != size) + { + s[size] = 0; + throw Exception(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME); + } + + UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); + UInt8 month = (s[5] - '0') * 10 + (s[6] - '0'); + UInt8 day = (s[8] - '0') * 10 + (s[9] - '0'); + + UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0'); + UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0'); + UInt8 second = (s[17] - '0') * 10 + (s[18] - '0'); + + datetime = Yandex::DateLUTSingleton::instance().makeDateTime(year, month, day, hour, minute, second); +} + + } #endif diff --git a/dbms/include/DB/IO/WriteHelpers.h b/dbms/include/DB/IO/WriteHelpers.h index 6d049b04d66..d9174e9cf31 100644 --- a/dbms/include/DB/IO/WriteHelpers.h +++ b/dbms/include/DB/IO/WriteHelpers.h @@ -127,6 +127,43 @@ inline void writeDateText(Yandex::DayNum_t date, WriteBuffer & buf) } +/// в формате YYYY-MM-DD HH:MM:SS, согласно текущему часовому поясу +inline void writeDateTimeText(time_t datetime, WriteBuffer & buf) +{ + char s[19]; + + Yandex::DateLUTSingleton & date_lut = Yandex::DateLUTSingleton::instance(); + const Yandex::DateLUT::Values & values = date_lut.getValues(datetime); + + s[0] = '0' + values.year / 1000; + s[1] = '0' + (values.year / 100) % 10; + s[2] = '0' + (values.year / 10) % 10; + s[3] = '0' + values.year % 10; + s[4] = '-'; + s[5] = '0' + values.month / 10; + s[6] = '0' + values.month % 10; + s[7] = '-'; + s[8] = '0' + values.day_of_month / 10; + s[9] = '0' + values.day_of_month % 10; + + UInt8 hour = date_lut.toHourInaccurate(datetime); + UInt8 minute = date_lut.toMinute(datetime); + UInt8 second = date_lut.toSecond(datetime); + + s[10] = ' '; + s[11] = '0' + hour / 10; + s[12] = '0' + hour % 10; + s[13] = ':'; + s[14] = '0' + minute / 10; + s[15] = '0' + minute % 10; + s[16] = ':'; + s[17] = '0' + second / 10; + s[18] = '0' + second % 10; + + buf.write(s, 19); +} + + } #endif diff --git a/dbms/src/DataTypes/DataTypeFixedString.cpp b/dbms/src/DataTypes/DataTypeFixedString.cpp new file mode 100644 index 00000000000..9df7617906e --- /dev/null +++ b/dbms/src/DataTypes/DataTypeFixedString.cpp @@ -0,0 +1,113 @@ +#include + +#include +#include +#include + +#include + +#include +#include +#include + + +namespace DB +{ + +using Poco::SharedPtr; + + +void DataTypeFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const +{ + const String & s = boost::get(field); + ostr.write(s.data(), std::min(s.size(), n)); + if (s.size() < n) + for (size_t i = s.size(); i < n; ++i) + ostr.write(0); +} + + +void DataTypeFixedString::deserializeBinary(Field & field, ReadBuffer & istr) const +{ + field = String(); + String & s = boost::get(field); + s.resize(n); + /// непереносимо, но (действительно) быстрее + istr.readStrict(const_cast(s.data()), n); +} + + +void DataTypeFixedString::serializeBinary(const IColumn & column, WriteBuffer & ostr) const +{ + const ColumnFixedArray & column_array = dynamic_cast(column); + const ColumnUInt8::Container_t & data = dynamic_cast(column_array.getData()).getData(); + + ostr.write(reinterpret_cast(&data[0]), data.size()); +} + + +void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr, size_t limit) const +{ + ColumnFixedArray & column_array = dynamic_cast(column); + ColumnUInt8::Container_t & data = dynamic_cast(column_array.getData()).getData(); + + size_t max_bytes = limit * n; + data.resize(max_bytes); + size_t read_bytes = istr.read(reinterpret_cast(&data[0]), max_bytes); + + if (read_bytes % n != 0) + throw Exception("Cannot read all data of type FixedString", + ErrorCodes::CANNOT_READ_ALL_DATA); + + data.resize(read_bytes); +} + + +void DataTypeFixedString::serializeText(const Field & field, WriteBuffer & ostr) const +{ + writeString(boost::get(field), ostr); +} + + +void DataTypeFixedString::deserializeText(Field & field, ReadBuffer & istr) const +{ + String s; + readString(s, istr); + field = s; +} + + +void DataTypeFixedString::serializeTextEscaped(const Field & field, WriteBuffer & ostr) const +{ + writeEscapedString(boost::get(field), ostr); +} + + +void DataTypeFixedString::deserializeTextEscaped(Field & field, ReadBuffer & istr) const +{ + String s; + readEscapedString(s, istr); + field = s; +} + + +void DataTypeFixedString::serializeTextQuoted(const Field & field, WriteBuffer & ostr, bool compatible) const +{ + writeQuotedString(boost::get(field), ostr); +} + + +void DataTypeFixedString::deserializeTextQuoted(Field & field, ReadBuffer & istr, bool compatible) const +{ + String s; + readQuotedString(s, istr); + field = s; +} + + +SharedPtr DataTypeFixedString::createColumn() const +{ + return new ColumnFixedString(n); +} + +} diff --git a/dbms/src/DataTypes/DataTypeString.cpp b/dbms/src/DataTypes/DataTypeString.cpp index 1556d9be638..69a1ad53a94 100644 --- a/dbms/src/DataTypes/DataTypeString.cpp +++ b/dbms/src/DataTypes/DataTypeString.cpp @@ -29,7 +29,7 @@ void DataTypeString::deserializeBinary(Field & field, ReadBuffer & istr) const { UInt64 size; readVarUInt(size, istr); - field = String(""); + field = String(); String & s = boost::get(field); s.resize(size); /// непереносимо, но (действительно) быстрее diff --git a/dbms/src/Storages/tests/hit_log.cpp b/dbms/src/Storages/tests/hit_log.cpp index ac24f941a3c..a9ac89f14f8 100644 --- a/dbms/src/Storages/tests/hit_log.cpp +++ b/dbms/src/Storages/tests/hit_log.cpp @@ -11,6 +11,8 @@ #include #include +#include +#include #include #include @@ -33,18 +35,14 @@ int main(int argc, char ** argv) boost::assign::push_back(names_and_types_list) ("WatchID", new DB::DataTypeUInt64) - ("ChunkID", new DB::DataTypeUInt64) - ("Random", new DB::DataTypeUInt32) ("JavaEnable", new DB::DataTypeUInt8) - ("FrameEnable", new DB::DataTypeUInt8) ("Title", new DB::DataTypeString) ("GoodEvent", new DB::DataTypeUInt32) -// ("EventTime", new DB::DataTypeDateTime) + ("EventTime", new DB::DataTypeDateTime) ("CounterID", new DB::DataTypeUInt32) ("ClientIP", new DB::DataTypeUInt32) ("RegionID", new DB::DataTypeUInt32) ("UniqID", new DB::DataTypeUInt64) - ("SessID", new DB::DataTypeUInt32) ("CounterClass", new DB::DataTypeUInt8) ("OS", new DB::DataTypeUInt8) ("UserAgent", new DB::DataTypeUInt8) @@ -60,7 +58,7 @@ int main(int argc, char ** argv) ("NetMajor", new DB::DataTypeUInt8) ("NetMinor", new DB::DataTypeUInt8) ("UserAgentMajor", new DB::DataTypeUInt16) -// ("UserAgentMinor", new DB::DataTypeFixedString(2)) + ("UserAgentMinor", new DB::DataTypeFixedString(2)) ("CookieEnable", new DB::DataTypeUInt8) ("JavascriptEnable", new DB::DataTypeUInt8) ("IsMobile", new DB::DataTypeUInt8) @@ -76,12 +74,24 @@ int main(int argc, char ** argv) ("WindowClientWidth", new DB::DataTypeUInt16) ("WindowClientHeight", new DB::DataTypeUInt16) ("ClientTimeZone", new DB::DataTypeInt16) -// ("ClientEventTime", new DB::DataTypeDateTime) + ("ClientEventTime", new DB::DataTypeDateTime) ("SilverlightVersion1", new DB::DataTypeUInt8) ("SilverlightVersion2", new DB::DataTypeUInt8) ("SilverlightVersion3", new DB::DataTypeUInt32) ("SilverlightVersion4", new DB::DataTypeUInt16) ("PageCharset", new DB::DataTypeString) + ("CodeVersion", new DB::DataTypeUInt32) + ("IsLink", new DB::DataTypeUInt8) + ("IsDownload", new DB::DataTypeUInt8) + ("IsNotBounce", new DB::DataTypeUInt8) + ("FUniqID", new DB::DataTypeUInt64) + ("OriginalURL", new DB::DataTypeString) + ("HID", new DB::DataTypeUInt32) + ("IsOldCounter", new DB::DataTypeUInt8) + ("IsEvent", new DB::DataTypeUInt8) + ("IsParameter", new DB::DataTypeUInt8) + ("DontCountHits", new DB::DataTypeUInt8) + ("WithHash", new DB::DataTypeUInt8) ; SharedPtr names_and_types_map = new DB::NamesAndTypes;