ClickHouse/dbms/include/DB/IO/ReadHelpers.h

679 lines
18 KiB
C
Raw Normal View History

2011-10-30 05:19:41 +00:00
#pragma once
2010-05-28 19:13:55 +00:00
2010-06-01 13:35:09 +00:00
#include <cstring>
2010-06-01 13:41:51 +00:00
#include <limits>
2010-05-28 19:13:55 +00:00
#include <algorithm>
2014-01-08 16:33:28 +00:00
#include <type_traits>
#include <Yandex/Common.h>
2011-05-05 19:10:17 +00:00
#include <Yandex/DateLUT.h>
#include <mysqlxx/Date.h>
#include <mysqlxx/DateTime.h>
2010-05-28 19:13:55 +00:00
#include <DB/Core/Types.h>
#include <DB/Core/Exception.h>
#include <DB/Core/ErrorCodes.h>
2010-06-04 18:25:25 +00:00
#include <DB/IO/ReadBuffer.h>
2011-08-19 18:31:14 +00:00
#include <DB/IO/VarInt.h>
2014-04-02 13:45:39 +00:00
#include <city.h>
2011-08-19 18:31:14 +00:00
#define DEFAULT_MAX_STRING_SIZE 0x00FFFFFFULL
2010-05-28 19:13:55 +00:00
namespace DB
{
2010-06-01 13:35:09 +00:00
/// Функции-помошники для форматированного чтения
static inline char parseEscapeSequence(char c)
{
switch(c)
2010-05-28 19:13:55 +00:00
{
2010-06-01 13:35:09 +00:00
case 'b':
return '\b';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case '0':
return '\0';
default:
return c;
2010-05-28 19:13:55 +00:00
}
2010-06-01 13:35:09 +00:00
}
2010-05-28 19:13:55 +00:00
/// Эти функции находятся в VarInt.h
/// inline void throwReadAfterEOF()
/// inline void readChar(char & x, ReadBuffer & buf)
2010-06-04 18:25:25 +00:00
/// Чтение POD-типа в native формате
2011-07-04 18:22:37 +00:00
template <typename T>
inline void readPODBinary(T & x, ReadBuffer & buf)
2011-07-04 18:22:37 +00:00
{
buf.readStrict(reinterpret_cast<char *>(&x), sizeof(x));
}
2011-08-09 17:24:17 +00:00
template <typename T>
inline void readIntBinary(T & x, ReadBuffer & buf)
{
readPODBinary(x, buf);
2011-08-09 17:24:17 +00:00
}
template <typename T>
inline void readFloatBinary(T & x, ReadBuffer & buf)
{
readPODBinary(x, buf);
2011-08-09 17:24:17 +00:00
}
2011-07-04 18:22:37 +00:00
2011-08-19 18:31:14 +00:00
2013-10-10 23:06:51 +00:00
inline void readStringBinary(std::string & s, ReadBuffer & buf, size_t MAX_STRING_SIZE = DEFAULT_MAX_STRING_SIZE)
2011-08-19 18:31:14 +00:00
{
size_t size = 0;
2013-10-10 23:06:51 +00:00
readVarUInt(size, buf);
2011-08-19 18:31:14 +00:00
if (size > MAX_STRING_SIZE)
throw Poco::Exception("Too large string size.");
s.resize(size);
buf.readStrict(&s[0], size);
2011-08-19 18:31:14 +00:00
}
template <typename T>
void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SIZE = DEFAULT_MAX_STRING_SIZE)
{
size_t size = 0;
2013-10-10 23:06:51 +00:00
readVarUInt(size, buf);
if (size > MAX_VECTOR_SIZE)
throw Poco::Exception("Too large vector size.");
v.resize(size);
for (size_t i = 0; i < size; ++i)
readBinary(v[i], buf);
}
2010-06-01 14:12:28 +00:00
void assertString(const char * s, ReadBuffer & buf);
void assertEOF(ReadBuffer & buf);
void assertChar(char symbol, ReadBuffer & buf);
2010-06-01 13:35:09 +00:00
2014-03-24 12:10:47 +00:00
inline void assertString(const String & s, ReadBuffer & buf)
2014-03-22 14:44:44 +00:00
{
assertString(s.c_str(), buf);
}
inline void readBoolText(bool & x, ReadBuffer & buf)
{
char tmp = '0';
readChar(tmp, buf);
x = tmp != '0';
}
2010-06-01 13:35:09 +00:00
template <typename T>
void readIntText(T & x, ReadBuffer & buf)
{
bool negative = false;
x = 0;
2010-06-04 18:25:25 +00:00
if (buf.eof())
throwReadAfterEOF();
2010-06-01 13:35:09 +00:00
while (!buf.eof())
{
switch (*buf.position())
2010-05-28 19:13:55 +00:00
{
2010-06-01 13:35:09 +00:00
case '+':
break;
case '-':
2014-01-08 16:33:28 +00:00
if (std::is_signed<T>::value)
negative = true;
else
return;
2010-06-01 13:35:09 +00:00
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
x *= 10;
x += *buf.position() - '0';
break;
default:
if (negative)
2010-05-28 19:13:55 +00:00
x = -x;
2010-06-01 13:35:09 +00:00
return;
2010-05-28 19:13:55 +00:00
}
2010-06-01 13:35:09 +00:00
++buf.position();
2010-05-28 19:13:55 +00:00
}
2010-06-01 13:35:09 +00:00
if (negative)
x = -x;
}
/** Более оптимизированная версия (примерно в 1.5 раза на реальных данных).
* Отличается тем, что:
* - из чисел, начинающихся на ноль, парсится только ноль;
* - не поддерживается символ '+' перед числом;
* - символы :;<=>? парсятся, как некоторые цифры.
*/
template <typename T>
void readIntTextUnsafe(T & x, ReadBuffer & buf)
{
bool negative = false;
x = 0;
if (unlikely(buf.eof()))
throwReadAfterEOF();
2014-01-08 16:33:28 +00:00
if (std::is_signed<T>::value && *buf.position() == '-')
{
++buf.position();
negative = true;
if (unlikely(buf.eof()))
throwReadAfterEOF();
}
if (*buf.position() == '0') /// В реальных данных много нулей.
{
++buf.position();
return;
}
while (!buf.eof())
{
if ((*buf.position() & 0xF0) == 0x30) /// Имеет смысл, что это условие находится внутри цикла.
{
x *= 10;
x += *buf.position() & 0x0F;
++buf.position();
}
else
break;
}
2014-01-08 16:33:28 +00:00
if (std::is_signed<T>::value && negative)
x = -x;
}
2010-06-01 13:35:09 +00:00
/// грубо
template <typename T>
void readFloatText(T & x, ReadBuffer & buf)
{
bool negative = false;
x = 0;
bool after_point = false;
double power_of_ten = 1;
2010-05-28 19:13:55 +00:00
2010-06-04 18:25:25 +00:00
if (buf.eof())
throwReadAfterEOF();
2010-06-01 13:35:09 +00:00
while (!buf.eof())
2010-05-28 19:13:55 +00:00
{
2010-06-01 13:35:09 +00:00
switch (*buf.position())
2010-05-28 19:13:55 +00:00
{
2010-06-01 13:35:09 +00:00
case '+':
break;
case '-':
negative = true;
break;
case '.':
after_point = true;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
if (after_point)
{
power_of_ten /= 10;
x += (*buf.position() - '0') * power_of_ten;
}
else
{
x *= 10;
x += *buf.position() - '0';
}
break;
case 'e':
case 'E':
2010-05-28 19:13:55 +00:00
{
2010-06-01 13:35:09 +00:00
++buf.position();
Int32 exponent = 0;
readIntText(exponent, buf);
x *= exp10(exponent);
if (negative)
x = -x;
return;
2010-05-28 19:13:55 +00:00
}
2010-06-01 13:35:09 +00:00
case 'i':
++buf.position();
assertString("nf", buf);
x = std::numeric_limits<T>::infinity();
if (negative)
x = -x;
return;
case 'I':
++buf.position();
assertString("NF", buf);
x = std::numeric_limits<T>::infinity();
if (negative)
x = -x;
return;
case 'n':
++buf.position();
assertString("an", buf);
x = std::numeric_limits<T>::quiet_NaN();
return;
case 'N':
++buf.position();
assertString("AN", buf);
x = std::numeric_limits<T>::quiet_NaN();
return;
default:
if (negative)
x = -x;
2010-05-28 19:13:55 +00:00
return;
}
2010-06-01 13:35:09 +00:00
++buf.position();
2010-05-28 19:13:55 +00:00
}
2010-06-01 13:35:09 +00:00
if (negative)
x = -x;
}
2010-05-28 19:13:55 +00:00
2011-08-09 17:24:17 +00:00
2010-06-01 13:35:09 +00:00
/// грубо; всё до '\n' или '\t'
2010-06-01 14:12:28 +00:00
void readString(String & s, ReadBuffer & buf);
2010-05-28 19:13:55 +00:00
2010-06-01 14:12:28 +00:00
void readEscapedString(String & s, ReadBuffer & buf);
2010-05-28 19:13:55 +00:00
2010-06-01 14:12:28 +00:00
void readQuotedString(String & s, ReadBuffer & buf);
2010-05-28 19:13:55 +00:00
2011-06-15 18:54:18 +00:00
void readDoubleQuotedString(String & s, ReadBuffer & buf);
2011-11-01 17:57:37 +00:00
void readBackQuotedString(String & s, ReadBuffer & buf);
2010-05-28 19:13:55 +00:00
2011-05-05 19:10:17 +00:00
/// в формате YYYY-MM-DD
inline void readDateText(DayNum_t & date, ReadBuffer & buf)
2011-05-05 19:10:17 +00:00
{
char s[10];
2011-05-27 18:46:29 +00:00
size_t size = buf.read(s, 10);
if (10 != size)
{
s[size] = 0;
throw Exception(std::string("Cannot parse date ") + s, ErrorCodes::CANNOT_PARSE_DATE);
}
2011-05-05 19:10:17 +00:00
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
2014-07-08 23:52:53 +00:00
date = DateLUT::instance().makeDayNum(year, month, day);
2011-05-05 19:10:17 +00:00
}
inline void readDateText(mysqlxx::Date & date, ReadBuffer & buf)
{
char s[10];
size_t size = buf.read(s, 10);
if (10 != size)
{
s[size] = 0;
throw Exception(std::string("Cannot parse date ") + s, ErrorCodes::CANNOT_PARSE_DATE);
}
date.year((s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'));
date.month((s[5] - '0') * 10 + (s[6] - '0'));
date.day((s[8] - '0') * 10 + (s[9] - '0'));
}
2011-08-07 02:08:22 +00:00
template <typename T>
inline T parse(const char * data, size_t size);
void readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf);
/** В формате YYYY-MM-DD hh:mm:ss, согласно текущему часовому поясу
* В качестве исключения, также поддерживается парсинг из десятичного числа - unix timestamp.
*/
2011-08-07 02:08:22 +00:00
inline void readDateTimeText(time_t & datetime, ReadBuffer & buf)
{
/** Считываем 10 символов, которые могут быть unix timestamp.
* При этом, поддерживается только unix timestamp из 10 символов - от 9 сентября 2001.
* Потом смотрим на пятый символ. Если это число - парсим unix timestamp.
* Если это не число - парсим YYYY-MM-DD hh:mm:ss.
*/
/// Оптимистичный вариант, когда всё значение точно лежит в буфере.
const char * s = buf.position();
if (s + 19 < buf.buffer().end())
{
if (s[4] < '0' || s[4] > '9')
{
UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0');
UInt8 month = (s[5] - '0') * 10 + (s[6] - '0');
UInt8 day = (s[8] - '0') * 10 + (s[9] - '0');
UInt8 hour = (s[11] - '0') * 10 + (s[12] - '0');
UInt8 minute = (s[14] - '0') * 10 + (s[15] - '0');
UInt8 second = (s[17] - '0') * 10 + (s[18] - '0');
2011-08-07 02:08:22 +00:00
if (unlikely(year == 0))
datetime = 0;
else
datetime = DateLUT::instance().makeDateTime(year, month, day, hour, minute, second);
2011-08-07 02:08:22 +00:00
buf.position() += 19;
}
else
readIntTextUnsafe(datetime, buf);
}
2012-06-07 20:02:41 +00:00
else
readDateTimeTextFallback(datetime, buf);
2011-08-07 02:08:22 +00:00
}
inline void readDateTimeText(mysqlxx::DateTime & datetime, ReadBuffer & buf)
{
char s[19];
size_t size = buf.read(s, 19);
if (19 != size)
{
s[size] = 0;
throw Exception(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
}
datetime.year((s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'));
datetime.month((s[5] - '0') * 10 + (s[6] - '0'));
datetime.day((s[8] - '0') * 10 + (s[9] - '0'));
datetime.hour((s[11] - '0') * 10 + (s[12] - '0'));
datetime.minute((s[14] - '0') * 10 + (s[15] - '0'));
datetime.second((s[17] - '0') * 10 + (s[18] - '0'));
}
/// Общие методы для чтения значения в бинарном формате.
inline void readBinary(UInt8 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(UInt16 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(UInt32 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(UInt64 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(Int8 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(Int16 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(Int32 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(Int64 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(Float32 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(Float64 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(String & x, ReadBuffer & buf) { readStringBinary(x, buf); }
inline void readBinary(bool & x, ReadBuffer & buf) { readPODBinary(x, buf); }
2014-04-02 13:45:39 +00:00
inline void readBinary(uint128 & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(VisitID_t & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(mysqlxx::Date & x, ReadBuffer & buf) { readPODBinary(x, buf); }
inline void readBinary(mysqlxx::DateTime & x, ReadBuffer & buf) { readPODBinary(x, buf); }
/// Общие методы для чтения значения в текстовом виде из tab-separated формата.
inline void readText(UInt8 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(UInt16 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(UInt32 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(UInt64 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(Int8 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(Int16 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(Int32 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(Int64 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(Float32 & x, ReadBuffer & buf) { readFloatText(x, buf); }
inline void readText(Float64 & x, ReadBuffer & buf) { readFloatText(x, buf); }
inline void readText(String & x, ReadBuffer & buf) { readEscapedString(x, buf); }
inline void readText(bool & x, ReadBuffer & buf) { readBoolText(x, buf); }
inline void readText(VisitID_t & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readText(mysqlxx::Date & x, ReadBuffer & buf) { readDateText(x, buf); }
inline void readText(mysqlxx::DateTime & x, ReadBuffer & buf) { readDateTimeText(x, buf); }
2011-08-07 02:08:22 +00:00
/// Общие методы для чтения значения в текстовом виде, при необходимости, в кавычках.
inline void readQuoted(UInt8 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(UInt16 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(UInt32 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(UInt64 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(Int8 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(Int16 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(Int32 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(Int64 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(Float32 & x, ReadBuffer & buf) { readFloatText(x, buf); }
inline void readQuoted(Float64 & x, ReadBuffer & buf) { readFloatText(x, buf); }
inline void readQuoted(String & x, ReadBuffer & buf) { readQuotedString(x, buf); }
inline void readQuoted(bool & x, ReadBuffer & buf) { readBoolText(x, buf); }
inline void readQuoted(VisitID_t & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readQuoted(mysqlxx::Date & x, ReadBuffer & buf)
{
assertString("'", buf);
readDateText(x, buf);
assertString("'", buf);
}
inline void readQuoted(mysqlxx::DateTime & x, ReadBuffer & buf)
{
assertString("'", buf);
readDateTimeText(x, buf);
assertString("'", buf);
}
/// В двойных кавычках
inline void readDoubleQuoted(UInt8 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(UInt16 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(UInt32 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(UInt64 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(Int8 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(Int16 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(Int32 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(Int64 & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(Float32 & x, ReadBuffer & buf) { readFloatText(x, buf); }
inline void readDoubleQuoted(Float64 & x, ReadBuffer & buf) { readFloatText(x, buf); }
inline void readDoubleQuoted(String & x, ReadBuffer & buf) { readDoubleQuotedString(x, buf); }
inline void readDoubleQuoted(bool & x, ReadBuffer & buf) { readBoolText(x, buf); }
inline void readDoubleQuoted(VisitID_t & x, ReadBuffer & buf) { readIntText(x, buf); }
inline void readDoubleQuoted(mysqlxx::Date & x, ReadBuffer & buf)
{
assertString("\"", buf);
readDateText(x, buf);
assertString("\"", buf);
}
inline void readDoubleQuoted(mysqlxx::DateTime & x, ReadBuffer & buf)
{
assertString("\"", buf);
readDateTimeText(x, buf);
assertString("\"", buf);
}
template <typename T>
void readBinary(std::vector<T> & x, ReadBuffer & buf)
{
size_t size = 0;
readVarUInt(size, buf);
if (size > DEFAULT_MAX_STRING_SIZE)
throw Poco::Exception("Too large vector size.");
x.resize(size);
for (size_t i = 0; i < size; ++i)
readBinary(x[i], buf);
}
template <typename T>
void readQuoted(std::vector<T> & x, ReadBuffer & buf)
{
bool first = true;
assertString("[", buf);
while (!buf.eof() && *buf.position() != ']')
{
if (!first)
{
if (*buf.position() == ',')
++buf.position();
else
throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
}
first = false;
x.push_back(T());
readQuoted(x.back(), buf);
}
assertString("]", buf);
}
template <typename T>
void readDoubleQuoted(std::vector<T> & x, ReadBuffer & buf)
{
bool first = true;
assertString("[", buf);
while (!buf.eof() && *buf.position() != ']')
{
if (!first)
{
if (*buf.position() == ',')
++buf.position();
else
throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
}
first = false;
x.push_back(T());
readDoubleQuoted(x.back(), buf);
}
assertString("]", buf);
}
template <typename T>
void readText(std::vector<T> & x, ReadBuffer & buf)
{
readQuoted(x, buf);
}
2011-10-30 05:19:41 +00:00
/// Пропустить пробельные символы.
inline void skipWhitespaceIfAny(ReadBuffer & buf)
{
while (!buf.eof()
&& (*buf.position() == ' '
|| *buf.position() == '\t'
|| *buf.position() == '\n'
|| *buf.position() == '\r'
|| *buf.position() == '\f'))
++buf.position();
2010-05-28 19:13:55 +00:00
}
2012-05-08 05:42:05 +00:00
/** Прочитать сериализованный эксепшен.
* При сериализации/десериализации часть информации теряется
* (тип обрезается до базового, message заменяется на displayText, и stack trace дописывается в message)
* К нему может быть добавлено дополнительное сообщение (например, вы можете указать, откуда оно было прочитано).
*/
void readException(Exception & e, ReadBuffer & buf, const String & additional_message = "");
void readAndThrowException(ReadBuffer & buf, const String & additional_message = "");
/** Вспомогательная функция
*/
template <typename T>
static inline const char * tryReadIntText(T & x, const char * pos, const char * end)
{
bool negative = false;
x = 0;
if (pos >= end)
return pos;
while (pos < end)
{
switch (*pos)
{
case '+':
break;
case '-':
2014-01-08 16:33:28 +00:00
if (std::is_signed<T>::value)
negative = true;
else
return pos;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
x *= 10;
x += *pos - '0';
break;
default:
if (negative)
x = -x;
return pos;
}
++pos;
}
if (negative)
x = -x;
return pos;
}
2013-06-21 20:34:19 +00:00
/// Простые для использования методы чтения чего-либо из строки в текстовом виде.
template <typename T>
inline T parse(const char * data, size_t size)
{
T res;
2013-06-21 21:05:16 +00:00
ReadBuffer buf(const_cast<char *>(data), size, 0);
2013-06-21 20:34:19 +00:00
readText(res, buf);
return res;
}
2013-06-21 21:05:16 +00:00
template <typename T>
inline T parse(const char * data)
{
return parse<T>(data, strlen(data));
}
2013-06-21 20:34:19 +00:00
template <typename T>
inline T parse(const String & s)
{
return parse<T>(s.data(), s.size());
}
2011-10-30 05:19:41 +00:00
}