dbms: development.

This commit is contained in:
Alexey Milovidov 2011-08-07 02:08:22 +00:00
parent cf7fcfc91c
commit e1dca98ecb
11 changed files with 367 additions and 38 deletions

View File

@ -23,7 +23,7 @@ public:
ColumnFixedArray(SharedPtr<IColumn> nested_column, size_t n_)
: data(nested_column), n(n_)
{
data.clear();
clear();
}
SharedPtr<IColumn> cloneEmpty() const
@ -68,7 +68,7 @@ public:
void clear()
{
data.clear();
data->clear();
}
/** Более эффективные методы манипуляции */
@ -82,7 +82,7 @@ public:
return *data;
}
private:
protected:
SharedPtr<IColumn> data;
const size_t n;
};

View File

@ -0,0 +1,54 @@
#pragma once
#include <string.h> // memcpy
#include <DB/Columns/ColumnFixedArray.h>
#include <DB/Columns/ColumnsNumber.h>
namespace DB
{
/** Cтолбeц значений типа "строка фиксированной длины".
* Отличается от массива UInt8 фиксированной длины только получением элемента (в виде String, а не Array)
* Если вставить строку меньшей длины, то она будет дополнена нулевыми байтами.
*/
class ColumnFixedString : public ColumnFixedArray
{
private:
ColumnUInt8::Container_t & char_data;
public:
/** Создать пустой столбец строк фиксированной длины n */
ColumnFixedString(size_t n)
: ColumnFixedArray(new ColumnUInt8(), n),
char_data(dynamic_cast<ColumnUInt8 &>(*data).getData())
{
}
SharedPtr<IColumn> cloneEmpty() const
{
return new ColumnFixedString(n);
}
Field operator[](size_t index) const
{
return String(reinterpret_cast<const char *>(&char_data[n * index]), n);
}
void insert(const Field & x)
{
const String & s = boost::get<const String &>(x);
size_t old_size = char_data.size();
char_data.resize(old_size + n);
memcpy(&char_data[old_size], s.data(), s.size());
}
void insertDefault()
{
char_data.resize(char_data.size() + n);
}
};
}

View File

@ -48,6 +48,7 @@ namespace ErrorCodes
CANNOT_PARSE_DATE,
TOO_LARGE_SIZE_COMPRESSED,
CHECKSUM_DOESNT_MATCH,
CANNOT_PARSE_DATETIME,
};
}

View File

@ -1,10 +1,4 @@
#ifndef DBMS_DATA_TYPES_NUMBER_FIXED_H
#define DBMS_DATA_TYPES_NUMBER_FIXED_H
#include <Poco/DateTimeParser.h>
#include <DB/Core/Exception.h>
#include <DB/Core/ErrorCodes.h>
#pragma once
#include <DB/IO/ReadHelpers.h>
#include <DB/IO/WriteHelpers.h>
@ -16,36 +10,24 @@
namespace DB
{
class DataTypeDate : public IDataTypeNumberFixed<UInt16, ColumnUInt64>
class DataTypeDate : public IDataTypeNumberFixed<UInt16, ColumnUInt16>
{
private:
DateLUTSingleton & date_lut;
public:
DataTypeDate() : date_lut(DateLUTSingleton::instance()) {}
DataTypeDate() {}
std::string getName() const { return "Date"; }
SharedPtr<IDataType> clone() const { return new DataTypeDate; }
void serializeText(const Field & field, WriteBuffer & ostr) const
{
DateLUT::Values & values = date_lut.getValues(boost::get<UInt16>(field));
writeIntText(values.year, ostr);
writeChar('-', ostr);
writeIntText(values.month, ostr);
writeChar('-', ostr);
writeIntText(values.day_of_month, ostr);
writeDateText(boost::get<UInt64>(field), ostr);
}
void deserializeText(Field & field, ReadBuffer & istr) const
{
std::string s;
readString(s, istr);
// TODO: тормоза
int time_zone_diff = 0;
field = date_lut.toDayNum(Poco::DateTimeParser::parse(
s, time_zone_diff).timestamp().epochTime());
Yandex::DayNum_t x;
readDateText(x, istr);
field = x;
}
void serializeTextEscaped(const Field & field, WriteBuffer & ostr) const
@ -74,5 +56,3 @@ public:
};
}
#endif

View File

@ -0,0 +1,58 @@
#pragma once
#include <DB/IO/ReadHelpers.h>
#include <DB/IO/WriteHelpers.h>
#include <DB/Columns/ColumnsNumber.h>
#include <DB/DataTypes/IDataTypeNumberFixed.h>
namespace DB
{
class DataTypeDateTime : public IDataTypeNumberFixed<UInt32, ColumnUInt32>
{
public:
DataTypeDateTime() {}
std::string getName() const { return "DateTime"; }
SharedPtr<IDataType> clone() const { return new DataTypeDateTime; }
void serializeText(const Field & field, WriteBuffer & ostr) const
{
writeDateTimeText(boost::get<UInt64>(field), ostr);
}
void deserializeText(Field & field, ReadBuffer & istr) const
{
time_t x;
readDateTimeText(x, istr);
field = x;
}
void serializeTextEscaped(const Field & field, WriteBuffer & ostr) const
{
serializeText(field, ostr);
}
void deserializeTextEscaped(Field & field, ReadBuffer & istr) const
{
deserializeText(field, istr);
}
void serializeTextQuoted(const Field & field, WriteBuffer & ostr, bool compatible = false) const
{
writeChar('\'', ostr);
serializeText(field, ostr);
writeChar('\'', ostr);
}
void deserializeTextQuoted(Field & field, ReadBuffer & istr, bool compatible = false) const
{
assertString("'", istr);
deserializeText(field, istr);
assertString("'", istr);
}
};
}

View File

@ -0,0 +1,52 @@
#pragma once
#include <ostream>
#include <Poco/SharedPtr.h>
#include <Poco/NumberFormatter.h>
#include <DB/DataTypes/IDataType.h>
namespace DB
{
using Poco::SharedPtr;
class DataTypeFixedString : public IDataType
{
private:
size_t n;
public:
DataTypeFixedString(size_t n_) : n(n_) {}
std::string getName() const
{
return "FixedString(" + Poco::NumberFormatter::format(n) + ")";
}
SharedPtr<IDataType> clone() const
{
return new DataTypeFixedString(n);
}
void serializeBinary(const Field & field, WriteBuffer & ostr) const;
void deserializeBinary(Field & field, ReadBuffer & istr) const;
void serializeBinary(const IColumn & column, WriteBuffer & ostr) const;
void deserializeBinary(IColumn & column, ReadBuffer & istr, size_t limit) const;
void serializeText(const Field & field, WriteBuffer & ostr) const;
void deserializeText(Field & field, ReadBuffer & istr) const;
void serializeTextEscaped(const Field & field, WriteBuffer & ostr) const;
void deserializeTextEscaped(Field & field, ReadBuffer & istr) const;
void serializeTextQuoted(const Field & field, WriteBuffer & ostr, bool compatible = false) const;
void deserializeTextQuoted(Field & field, ReadBuffer & istr, bool compatible = false) const;
SharedPtr<IColumn> createColumn() const;
};
}

View File

@ -246,6 +246,30 @@ inline void readDateText(Yandex::DayNum_t & date, ReadBuffer & buf)
date = Yandex::DateLUTSingleton::instance().makeDayNum(year, month, day);
}
/// в формате YYYY-MM-DD HH:MM:SS, согласно текущему часовому поясу
inline void readDateTimeText(time_t & datetime, ReadBuffer & buf)
{
char s[19];
size_t size = buf.read(s, 19);
if (19 != size)
{
s[size] = 0;
throw Exception(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
}
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0');
UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0');
UInt8 second = (s[17] - '0') * 10 + (s[18] - '0');
datetime = Yandex::DateLUTSingleton::instance().makeDateTime(year, month, day, hour, minute, second);
}
}
#endif

View File

@ -127,6 +127,43 @@ inline void writeDateText(Yandex::DayNum_t date, WriteBuffer & buf)
}
/// в формате YYYY-MM-DD HH:MM:SS, согласно текущему часовому поясу
inline void writeDateTimeText(time_t datetime, WriteBuffer & buf)
{
char s[19];
Yandex::DateLUTSingleton & date_lut = Yandex::DateLUTSingleton::instance();
const Yandex::DateLUT::Values & values = date_lut.getValues(datetime);
s[0] = '0' + values.year / 1000;
s[1] = '0' + (values.year / 100) % 10;
s[2] = '0' + (values.year / 10) % 10;
s[3] = '0' + values.year % 10;
s[4] = '-';
s[5] = '0' + values.month / 10;
s[6] = '0' + values.month % 10;
s[7] = '-';
s[8] = '0' + values.day_of_month / 10;
s[9] = '0' + values.day_of_month % 10;
UInt8 hour = date_lut.toHourInaccurate(datetime);
UInt8 minute = date_lut.toMinute(datetime);
UInt8 second = date_lut.toSecond(datetime);
s[10] = ' ';
s[11] = '0' + hour / 10;
s[12] = '0' + hour % 10;
s[13] = ':';
s[14] = '0' + minute / 10;
s[15] = '0' + minute % 10;
s[16] = ':';
s[17] = '0' + second / 10;
s[18] = '0' + second % 10;
buf.write(s, 19);
}
}
#endif

View File

@ -0,0 +1,113 @@
#include <Poco/SharedPtr.h>
#include <DB/Columns/ColumnFixedArray.h>
#include <DB/Columns/ColumnFixedString.h>
#include <DB/Columns/ColumnsNumber.h>
#include <DB/DataTypes/DataTypeFixedString.h>
#include <DB/IO/ReadHelpers.h>
#include <DB/IO/WriteHelpers.h>
#include <DB/IO/VarInt.h>
namespace DB
{
using Poco::SharedPtr;
void DataTypeFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const String & s = boost::get<String>(field);
ostr.write(s.data(), std::min(s.size(), n));
if (s.size() < n)
for (size_t i = s.size(); i < n; ++i)
ostr.write(0);
}
void DataTypeFixedString::deserializeBinary(Field & field, ReadBuffer & istr) const
{
field = String();
String & s = boost::get<String>(field);
s.resize(n);
/// непереносимо, но (действительно) быстрее
istr.readStrict(const_cast<char*>(s.data()), n);
}
void DataTypeFixedString::serializeBinary(const IColumn & column, WriteBuffer & ostr) const
{
const ColumnFixedArray & column_array = dynamic_cast<const ColumnFixedArray &>(column);
const ColumnUInt8::Container_t & data = dynamic_cast<const ColumnUInt8 &>(column_array.getData()).getData();
ostr.write(reinterpret_cast<const char *>(&data[0]), data.size());
}
void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr, size_t limit) const
{
ColumnFixedArray & column_array = dynamic_cast<ColumnFixedArray &>(column);
ColumnUInt8::Container_t & data = dynamic_cast<ColumnUInt8 &>(column_array.getData()).getData();
size_t max_bytes = limit * n;
data.resize(max_bytes);
size_t read_bytes = istr.read(reinterpret_cast<char *>(&data[0]), max_bytes);
if (read_bytes % n != 0)
throw Exception("Cannot read all data of type FixedString",
ErrorCodes::CANNOT_READ_ALL_DATA);
data.resize(read_bytes);
}
void DataTypeFixedString::serializeText(const Field & field, WriteBuffer & ostr) const
{
writeString(boost::get<const String &>(field), ostr);
}
void DataTypeFixedString::deserializeText(Field & field, ReadBuffer & istr) const
{
String s;
readString(s, istr);
field = s;
}
void DataTypeFixedString::serializeTextEscaped(const Field & field, WriteBuffer & ostr) const
{
writeEscapedString(boost::get<const String &>(field), ostr);
}
void DataTypeFixedString::deserializeTextEscaped(Field & field, ReadBuffer & istr) const
{
String s;
readEscapedString(s, istr);
field = s;
}
void DataTypeFixedString::serializeTextQuoted(const Field & field, WriteBuffer & ostr, bool compatible) const
{
writeQuotedString(boost::get<const String &>(field), ostr);
}
void DataTypeFixedString::deserializeTextQuoted(Field & field, ReadBuffer & istr, bool compatible) const
{
String s;
readQuotedString(s, istr);
field = s;
}
SharedPtr<IColumn> DataTypeFixedString::createColumn() const
{
return new ColumnFixedString(n);
}
}

View File

@ -29,7 +29,7 @@ void DataTypeString::deserializeBinary(Field & field, ReadBuffer & istr) const
{
UInt64 size;
readVarUInt(size, istr);
field = String("");
field = String();
String & s = boost::get<String>(field);
s.resize(size);
/// непереносимо, но (действительно) быстрее

View File

@ -11,6 +11,8 @@
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/DataTypes/DataTypeFixedString.h>
#include <DB/DataTypes/DataTypeDateTime.h>
#include <DB/DataStreams/TabSeparatedRowInputStream.h>
#include <DB/DataStreams/TabSeparatedRowOutputStream.h>
@ -33,18 +35,14 @@ int main(int argc, char ** argv)
boost::assign::push_back(names_and_types_list)
("WatchID", new DB::DataTypeUInt64)
("ChunkID", new DB::DataTypeUInt64)
("Random", new DB::DataTypeUInt32)
("JavaEnable", new DB::DataTypeUInt8)
("FrameEnable", new DB::DataTypeUInt8)
("Title", new DB::DataTypeString)
("GoodEvent", new DB::DataTypeUInt32)
// ("EventTime", new DB::DataTypeDateTime)
("EventTime", new DB::DataTypeDateTime)
("CounterID", new DB::DataTypeUInt32)
("ClientIP", new DB::DataTypeUInt32)
("RegionID", new DB::DataTypeUInt32)
("UniqID", new DB::DataTypeUInt64)
("SessID", new DB::DataTypeUInt32)
("CounterClass", new DB::DataTypeUInt8)
("OS", new DB::DataTypeUInt8)
("UserAgent", new DB::DataTypeUInt8)
@ -60,7 +58,7 @@ int main(int argc, char ** argv)
("NetMajor", new DB::DataTypeUInt8)
("NetMinor", new DB::DataTypeUInt8)
("UserAgentMajor", new DB::DataTypeUInt16)
// ("UserAgentMinor", new DB::DataTypeFixedString(2))
("UserAgentMinor", new DB::DataTypeFixedString(2))
("CookieEnable", new DB::DataTypeUInt8)
("JavascriptEnable", new DB::DataTypeUInt8)
("IsMobile", new DB::DataTypeUInt8)
@ -76,12 +74,24 @@ int main(int argc, char ** argv)
("WindowClientWidth", new DB::DataTypeUInt16)
("WindowClientHeight", new DB::DataTypeUInt16)
("ClientTimeZone", new DB::DataTypeInt16)
// ("ClientEventTime", new DB::DataTypeDateTime)
("ClientEventTime", new DB::DataTypeDateTime)
("SilverlightVersion1", new DB::DataTypeUInt8)
("SilverlightVersion2", new DB::DataTypeUInt8)
("SilverlightVersion3", new DB::DataTypeUInt32)
("SilverlightVersion4", new DB::DataTypeUInt16)
("PageCharset", new DB::DataTypeString)
("CodeVersion", new DB::DataTypeUInt32)
("IsLink", new DB::DataTypeUInt8)
("IsDownload", new DB::DataTypeUInt8)
("IsNotBounce", new DB::DataTypeUInt8)
("FUniqID", new DB::DataTypeUInt64)
("OriginalURL", new DB::DataTypeString)
("HID", new DB::DataTypeUInt32)
("IsOldCounter", new DB::DataTypeUInt8)
("IsEvent", new DB::DataTypeUInt8)
("IsParameter", new DB::DataTypeUInt8)
("DontCountHits", new DB::DataTypeUInt8)
("WithHash", new DB::DataTypeUInt8)
;
SharedPtr<DB::NamesAndTypes> names_and_types_map = new DB::NamesAndTypes;