ClickHouse/dbms/include/DB/Functions/FunctionsConversion.h
2011-10-16 01:57:10 +00:00

294 lines
14 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#pragma once
#include <tr1/type_traits>
#include <Poco/NumberFormatter.h>
#include <Poco/UTF8Encoding.h>
#include <Poco/Unicode.h>
#include <DB/IO/WriteBufferFromVector.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/DataTypes/DataTypesNumberVariable.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/DataTypes/DataTypeFixedString.h>
#include <DB/DataTypes/DataTypeDate.h>
#include <DB/DataTypes/DataTypeDateTime.h>
#include <DB/Columns/ColumnString.h>
#include <DB/Columns/ColumnFixedString.h>
#include <DB/Columns/ColumnConst.h>
#include <DB/Functions/IFunction.h>
namespace DB
{
/** Функции преобразования типов.
*
* Бывают двух видов:
* - toType - преобразование "естественным образом";
* - reinterpretAsType - преобразования чисел и дат в строки, содержащие тот же набор байт в машинном представлении, и наоборот.
*/
/** Преобразование чисел друг в друга, дат/дат-с-временем в числа и наоборот: делается обычным присваиванием.
* (дата внутри хранится как количество дней с какого-то, дата-с-временем - как unix timestamp)
*/
template <typename FromDataType, typename ToDataType, typename Name>
struct ConvertImpl
{
typedef typename FromDataType::FieldType FromFieldType;
typedef typename ToDataType::FieldType ToFieldType;
static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
if (const ColumnVector<FromFieldType> * col_from = dynamic_cast<const ColumnVector<FromFieldType> *>(&*block.getByPosition(arguments[0]).column))
{
ColumnVector<ToFieldType> * col_to = new ColumnVector<ToFieldType>;
block.getByPosition(result).column = col_to;
const typename ColumnVector<FromFieldType>::Container_t & vec_from = col_from->getData();
typename ColumnVector<ToFieldType>::Container_t & vec_to = col_to->getData();
size_t size = vec_from.size();
vec_to.resize(size);
for (size_t i = 0; i < size; ++i)
vec_to[i] = vec_from[i];
}
else if (const ColumnConst<FromFieldType> * col_from = dynamic_cast<const ColumnConst<FromFieldType> *>(&*block.getByPosition(arguments[0]).column))
{
block.getByPosition(result).column = new ColumnConst<ToFieldType>(col_from->size(), col_from->getData());
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of first argument of function " + Name::get(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
/** Преобразование даты в дату-с-временем: добавление нулевого времени.
*/
template <typename Name>
struct ConvertImpl<DataTypeDate, DataTypeDateTime, Name>
{
typedef DataTypeDate::FieldType FromFieldType;
typedef DataTypeDateTime::FieldType ToFieldType;
static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
typedef DataTypeDate::FieldType FromFieldType;
Yandex::DateLUTSingleton & date_lut = Yandex::DateLUTSingleton::instance();
if (const ColumnVector<FromFieldType> * col_from = dynamic_cast<const ColumnVector<FromFieldType> *>(&*block.getByPosition(arguments[0]).column))
{
ColumnVector<ToFieldType> * col_to = new ColumnVector<ToFieldType>;
block.getByPosition(result).column = col_to;
const typename ColumnVector<FromFieldType>::Container_t & vec_from = col_from->getData();
typename ColumnVector<ToFieldType>::Container_t & vec_to = col_to->getData();
size_t size = vec_from.size();
vec_to.resize(size);
for (size_t i = 0; i < size; ++i)
vec_to[i] = date_lut.fromDayNum(Yandex::DayNum_t(vec_from[i]));
}
else if (const ColumnConst<FromFieldType> * col_from = dynamic_cast<const ColumnConst<FromFieldType> *>(&*block.getByPosition(arguments[0]).column))
{
block.getByPosition(result).column = new ColumnConst<ToFieldType>(col_from->size(), date_lut.fromDayNum(Yandex::DayNum_t(col_from->getData())));
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of first argument of function " + Name::get(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
/** Преобразование даты-с-временем в дату: отбрасывание времени.
*/
template <typename Name>
struct ConvertImpl<DataTypeDateTime, DataTypeDate, Name>
{
typedef DataTypeDateTime::FieldType FromFieldType;
typedef DataTypeDate::FieldType ToFieldType;
static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
Yandex::DateLUTSingleton & date_lut = Yandex::DateLUTSingleton::instance();
if (const ColumnVector<FromFieldType> * col_from = dynamic_cast<const ColumnVector<FromFieldType> *>(&*block.getByPosition(arguments[0]).column))
{
ColumnVector<ToFieldType> * col_to = new ColumnVector<ToFieldType>;
block.getByPosition(result).column = col_to;
const typename ColumnVector<FromFieldType>::Container_t & vec_from = col_from->getData();
typename ColumnVector<ToFieldType>::Container_t & vec_to = col_to->getData();
size_t size = vec_from.size();
vec_to.resize(size);
for (size_t i = 0; i < size; ++i)
vec_to[i] = date_lut.toDayNum(vec_from[i]);
}
else if (const ColumnConst<FromFieldType> * col_from = dynamic_cast<const ColumnConst<FromFieldType> *>(&*block.getByPosition(arguments[0]).column))
{
block.getByPosition(result).column = new ColumnConst<ToFieldType>(col_from->size(), date_lut.toDayNum(col_from->getData()));
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of first argument of function " + Name::get(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
/** Преобразование чисел в строки: через форматирование.
*/
template <typename FromDataType, typename Name>
struct ConvertImpl<FromDataType, DataTypeString, Name>
{
typedef typename FromDataType::FieldType FromFieldType;
static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
if (const ColumnVector<FromFieldType> * col_from = dynamic_cast<const ColumnVector<FromFieldType> *>(&*block.getByPosition(arguments[0]).column))
{
ColumnString * col_to = new ColumnString;
block.getByPosition(result).column = col_to;
const typename ColumnVector<FromFieldType>::Container_t & vec_from = col_from->getData();
ColumnUInt8::Container_t & data_to = dynamic_cast<ColumnUInt8 &>(col_to->getData()).getData();
ColumnString::Offsets_t & offsets_to = col_to->getOffsets();
size_t size = vec_from.size();
data_to.resize(size * 2);
offsets_to.resize(size);
WriteBufferFromVector<UInt8> write_buffer(data_to);
for (size_t i = 0; i < size; ++i)
{
writeText<FromFieldType>(vec_from[i], write_buffer);
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
}
data_to.resize(write_buffer.count());
}
else if (const ColumnConst<FromFieldType> * col_from = dynamic_cast<const ColumnConst<FromFieldType> *>(&*block.getByPosition(arguments[0]).column))
{
std::vector<char> buf;
WriteBufferFromVector<char> write_buffer(buf);
writeText<FromFieldType>(col_from->getData(), write_buffer);
block.getByPosition(result).column = new ColumnConstString(col_from->size(), std::string(&buf[0], write_buffer.count()));
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of first argument of function " + Name::get(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
/** Преобразование строки в числа: через парсинг.
*/
template <typename ToDataType, typename Name>
struct ConvertImpl<DataTypeString, ToDataType, Name>
{
typedef typename ToDataType::FieldType ToFieldType;
static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
}
};
template <typename Name>
struct ConvertImpl<DataTypeString, DataTypeString, Name>
{
static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
}
};
template <typename ToDataType, typename Name>
class FunctionConvert : public IFunction
{
public:
/// Получить имя функции.
String getName() const
{
return Name::get();
}
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
DataTypePtr getReturnType(const DataTypes & arguments) const
{
if (arguments.size() != 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ Poco::NumberFormatter::format(arguments.size()) + ", should be 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
return new ToDataType;
}
/// Выполнить функцию над блоком.
void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
IDataType * from_type = &*block.getByPosition(arguments[0]).type;
if (dynamic_cast<const DataTypeUInt8 * >(from_type)) ConvertImpl<DataTypeUInt8, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeUInt16 * >(from_type)) ConvertImpl<DataTypeUInt16, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeUInt32 * >(from_type)) ConvertImpl<DataTypeUInt32, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeUInt64 * >(from_type)) ConvertImpl<DataTypeUInt64, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeInt8 * >(from_type)) ConvertImpl<DataTypeInt8, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeInt16 * >(from_type)) ConvertImpl<DataTypeInt16, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeInt32 * >(from_type)) ConvertImpl<DataTypeInt32, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeInt64 * >(from_type)) ConvertImpl<DataTypeInt64, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeFloat32 * >(from_type)) ConvertImpl<DataTypeFloat32, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeFloat64 * >(from_type)) ConvertImpl<DataTypeFloat64, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeVarUInt * >(from_type)) ConvertImpl<DataTypeVarUInt, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeVarInt * >(from_type)) ConvertImpl<DataTypeVarInt, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeDate * >(from_type)) ConvertImpl<DataTypeDate, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeDateTime * >(from_type)) ConvertImpl<DataTypeDateTime, ToDataType, Name>::execute(block, arguments, result);
else if (dynamic_cast<const DataTypeString * >(from_type)) ConvertImpl<DataTypeString, ToDataType, Name>::execute(block, arguments, result);
else
throw Exception("Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct NameToUInt8 { static const char * get() { return "toUInt8"; } };
struct NameToUInt16 { static const char * get() { return "toUInt16"; } };
struct NameToUInt32 { static const char * get() { return "toUInt32"; } };
struct NameToUInt64 { static const char * get() { return "toUInt64"; } };
struct NameToInt8 { static const char * get() { return "toInt8"; } };
struct NameToInt16 { static const char * get() { return "toInt16"; } };
struct NameToInt32 { static const char * get() { return "toInt32"; } };
struct NameToInt64 { static const char * get() { return "toInt64"; } };
struct NameToFloat32 { static const char * get() { return "toFloat32"; } };
struct NameToFloat64 { static const char * get() { return "toFloat64"; } };
struct NameToVarUInt { static const char * get() { return "toVarUInt"; } };
struct NameToVatInt { static const char * get() { return "toVarInt"; } };
struct NameToDate { static const char * get() { return "toDate"; } };
struct NameToDateTime { static const char * get() { return "toDateTime"; } };
struct NameToString { static const char * get() { return "toString"; } };
typedef FunctionConvert<DataTypeUInt8, NameToUInt8> FunctionToUInt8;
typedef FunctionConvert<DataTypeUInt16, NameToUInt16> FunctionToUInt16;
typedef FunctionConvert<DataTypeUInt32, NameToUInt32> FunctionToUInt32;
typedef FunctionConvert<DataTypeUInt64, NameToUInt64> FunctionToUInt64;
typedef FunctionConvert<DataTypeInt8, NameToInt8> FunctionToInt8;
typedef FunctionConvert<DataTypeInt16, NameToInt16> FunctionToInt16;
typedef FunctionConvert<DataTypeInt32, NameToInt32> FunctionToInt32;
typedef FunctionConvert<DataTypeInt64, NameToInt64> FunctionToInt64;
typedef FunctionConvert<DataTypeFloat32, NameToFloat32> FunctionToFloat32;
typedef FunctionConvert<DataTypeFloat64, NameToFloat64> FunctionToFloat64;
typedef FunctionConvert<DataTypeVarUInt, NameToVarUInt> FunctionToVarUInt;
typedef FunctionConvert<DataTypeVarInt, NameToVatInt> FunctionToVarInt;
typedef FunctionConvert<DataTypeDate, NameToDate> FunctionToDate;
typedef FunctionConvert<DataTypeDateTime, NameToDateTime> FunctionToDateTime;
typedef FunctionConvert<DataTypeString, NameToString> FunctionToString;
}