#pragma once #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { /** Функции преобразования типов. * toType - преобразование "естественным образом"; */ /** Преобразование чисел друг в друга, дат/дат-с-временем в числа и наоборот: делается обычным присваиванием. * (дата внутри хранится как количество дней с какого-то, дата-с-временем - как unix timestamp) */ template struct ConvertImpl { typedef typename FromDataType::FieldType FromFieldType; typedef typename ToDataType::FieldType ToFieldType; static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { if (const ColumnVector * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { ColumnVector * col_to = new ColumnVector; block.getByPosition(result).column = col_to; const typename ColumnVector::Container_t & vec_from = col_from->getData(); typename ColumnVector::Container_t & vec_to = col_to->getData(); size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) vec_to[i] = vec_from[i]; } else if (const ColumnConst * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { block.getByPosition(result).column = new ColumnConst(col_from->size(), col_from->getData()); } else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } }; /** Преобразование даты в дату-с-временем: добавление нулевого времени. */ template struct ConvertImpl { typedef DataTypeDate::FieldType FromFieldType; typedef DataTypeDateTime::FieldType ToFieldType; static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { typedef DataTypeDate::FieldType FromFieldType; DateLUT & date_lut = DateLUT::instance(); if (const ColumnVector * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { ColumnVector * col_to = new ColumnVector; block.getByPosition(result).column = col_to; const typename ColumnVector::Container_t & vec_from = col_from->getData(); typename ColumnVector::Container_t & vec_to = col_to->getData(); size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) { vec_to[i] = date_lut.fromDayNum(DayNum_t(vec_from[i])); } } else if (const ColumnConst * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { block.getByPosition(result).column = new ColumnConst(col_from->size(), date_lut.fromDayNum(DayNum_t(col_from->getData()))); } else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } }; /** Преобразование даты-с-временем в дату: отбрасывание времени. */ template struct ConvertImpl { typedef DataTypeDateTime::FieldType FromFieldType; typedef DataTypeDate::FieldType ToFieldType; static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { DateLUT & date_lut = DateLUT::instance(); if (const ColumnVector * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { ColumnVector * col_to = new ColumnVector; block.getByPosition(result).column = col_to; const typename ColumnVector::Container_t & vec_from = col_from->getData(); typename ColumnVector::Container_t & vec_to = col_to->getData(); size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) vec_to[i] = date_lut.toDayNum(vec_from[i]); } else if (const ColumnConst * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { block.getByPosition(result).column = new ColumnConst(col_from->size(), date_lut.toDayNum(col_from->getData())); } else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } }; /** Отдельный случай для преобразования (U)Int32 или (U)Int64 в Date. * Если число меньше 65536, то оно понимается, как DayNum, а если больше или равно - как unix timestamp. * Немного нелогично, что мы, по сути, помещаем две разные функции в одну. * Но зато это позволяет поддержать распространённый случай, * когда пользователь пишет toDate(UInt32), ожидая, что это - перевод unix timestamp в дату * (иначе такое использование было бы распространённой ошибкой). */ template struct ConvertImpl32Or64ToDate { typedef typename FromDataType::FieldType FromFieldType; typedef DataTypeDate::FieldType ToFieldType; template static To convert(const From & from, const DateLUT & date_lut) { return from < 0xFFFF ? from : date_lut.toDayNum(from); } static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { DateLUT & date_lut = DateLUT::instance(); if (const ColumnVector * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { ColumnVector * col_to = new ColumnVector; block.getByPosition(result).column = col_to; const typename ColumnVector::Container_t & vec_from = col_from->getData(); typename ColumnVector::Container_t & vec_to = col_to->getData(); size_t size = vec_from.size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) vec_to[i] = convert(vec_from[i], date_lut); } else if (const ColumnConst * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { block.getByPosition(result).column = new ColumnConst(col_from->size(), convert(col_from->getData(), date_lut)); } else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } }; template struct ConvertImpl : ConvertImpl32Or64ToDate {}; template struct ConvertImpl : ConvertImpl32Or64ToDate {}; template struct ConvertImpl : ConvertImpl32Or64ToDate {}; template struct ConvertImpl : ConvertImpl32Or64ToDate {}; /** Преобразование чисел, дат, дат-с-временем в строки: через форматирование. */ template void formatImpl(typename DataType::FieldType x, WriteBuffer & wb) { writeText(x, wb); } template <> inline void formatImpl(DataTypeDate::FieldType x, WriteBuffer & wb) { writeDateText(DayNum_t(x), wb); } template <> inline void formatImpl(DataTypeDateTime::FieldType x, WriteBuffer & wb) { writeDateTimeText(x, wb); } template struct ConvertImpl { typedef typename FromDataType::FieldType FromFieldType; static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { if (const ColumnVector * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { ColumnString * col_to = new ColumnString; block.getByPosition(result).column = col_to; const typename ColumnVector::Container_t & vec_from = col_from->getData(); ColumnString::Chars_t & data_to = col_to->getChars(); ColumnString::Offsets_t & offsets_to = col_to->getOffsets(); size_t size = vec_from.size(); data_to.resize(size * 2); offsets_to.resize(size); WriteBufferFromVector write_buffer(data_to); for (size_t i = 0; i < size; ++i) { formatImpl(vec_from[i], write_buffer); writeChar(0, write_buffer); offsets_to[i] = write_buffer.count(); } data_to.resize(write_buffer.count()); } else if (const ColumnConst * col_from = typeid_cast *>(&*block.getByPosition(arguments[0]).column)) { std::vector buf; WriteBufferFromVector > write_buffer(buf); formatImpl(col_from->getData(), write_buffer); block.getByPosition(result).column = new ColumnConstString(col_from->size(), std::string(&buf[0], write_buffer.count())); } else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } }; /** Преобразование строк в числа, даты, даты-с-временем: через парсинг. */ template void parseImpl(typename DataType::FieldType & x, ReadBuffer & rb) { readText(x,rb); } template <> inline void parseImpl(DataTypeDate::FieldType & x, ReadBuffer & rb) { DayNum_t tmp(0); readDateText(tmp, rb); x = tmp; } template <> inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb) { time_t tmp = 0; readDateTimeText(tmp, rb); x = tmp; } template struct ConvertImpl { typedef typename ToDataType::FieldType ToFieldType; static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { if (const ColumnString * col_from = typeid_cast(&*block.getByPosition(arguments[0]).column)) { ColumnVector * col_to = new ColumnVector; block.getByPosition(result).column = col_to; const ColumnString::Chars_t & data_from = col_from->getChars(); typename ColumnVector::Container_t & vec_to = col_to->getData(); size_t size = col_from->size(); vec_to.resize(size); ReadBuffer read_buffer(const_cast(reinterpret_cast(&data_from[0])), data_from.size(), 0); char zero = 0; for (size_t i = 0; i < size; ++i) { parseImpl(vec_to[i], read_buffer); readChar(zero, read_buffer); if (zero != 0) throw Exception("Cannot parse from string.", ErrorCodes::CANNOT_PARSE_NUMBER); } } else if (const ColumnConstString * col_from = typeid_cast(&*block.getByPosition(arguments[0]).column)) { const String & s = col_from->getData(); ReadBufferFromString read_buffer(s); ToFieldType x = 0; parseImpl(x, read_buffer); block.getByPosition(result).column = new ColumnConst(col_from->size(), x); } else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } }; /** Если типы совпадают - просто скопируем ссылку на столбец. */ template struct ConvertImpl { static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { block.getByPosition(result).column = block.getByPosition(arguments[0]).column; } }; /** Преобразование из FixedString. */ template struct ConvertImpl { typedef typename ToDataType::FieldType ToFieldType; static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { if (const ColumnFixedString * col_from = typeid_cast(&*block.getByPosition(arguments[0]).column)) { ColumnVector * col_to = new ColumnVector; block.getByPosition(result).column = col_to; const ColumnFixedString::Chars_t & data_from = col_from->getChars(); size_t n = col_from->getN(); typename ColumnVector::Container_t & vec_to = col_to->getData(); size_t size = col_from->size(); vec_to.resize(size); for (size_t i = 0; i < size; ++i) { char * begin = const_cast(reinterpret_cast(&data_from[i * n])); char * end = begin + n; ReadBuffer read_buffer(begin, n, 0); parseImpl(vec_to[i], read_buffer); if (!read_buffer.eof()) { while (read_buffer.position() < end && *read_buffer.position() == 0) ++read_buffer.position(); if (read_buffer.position() < end) throw Exception("Cannot parse from fixed string.", ErrorCodes::CANNOT_PARSE_NUMBER); } } } else if (typeid_cast(&*block.getByPosition(arguments[0]).column)) { ConvertImpl::execute(block, arguments, result); } else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } }; /** Преобразование из FixedString в String. * При этом, вырезаются последовательности нулевых байт с конца строк. */ template struct ConvertImpl { static void execute(Block & block, const ColumnNumbers & arguments, size_t result) { if (const ColumnFixedString * col_from = typeid_cast(&*block.getByPosition(arguments[0]).column)) { ColumnString * col_to = new ColumnString; block.getByPosition(result).column = col_to; const ColumnFixedString::Chars_t & data_from = col_from->getChars(); ColumnString::Chars_t & data_to = col_to->getChars(); ColumnString::Offsets_t & offsets_to = col_to->getOffsets(); size_t size = col_from->size(); size_t n = col_from->getN(); data_to.resize(size * (n + 1)); /// + 1 - нулевой байт offsets_to.resize(size); size_t offset_from = 0; size_t offset_to = 0; for (size_t i = 0; i < size; ++i) { size_t bytes_to_copy = n; while (bytes_to_copy > 0 && data_from[offset_from + bytes_to_copy - 1] == 0) --bytes_to_copy; memcpy(&data_to[offset_to], &data_from[offset_from], bytes_to_copy); offset_from += n; offset_to += bytes_to_copy; data_to[offset_to] = 0; ++offset_to; offsets_to[i] = offset_to; } data_to.resize(offset_to); } else if (const ColumnConstString * col_from = typeid_cast(&*block.getByPosition(arguments[0]).column)) { const String & s = col_from->getData(); size_t bytes_to_copy = s.size(); while (bytes_to_copy > 0 && s[bytes_to_copy - 1] == 0) --bytes_to_copy; block.getByPosition(result).column = new ColumnConstString(col_from->size(), s.substr(0, bytes_to_copy)); } else throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + Name::name, ErrorCodes::ILLEGAL_COLUMN); } }; template class FunctionConvert : public IFunction { public: static constexpr auto name = Name::name; static IFunction * create(const Context & context) { return new FunctionConvert; } /// Получить имя функции. String getName() const { return name; } /// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение. DataTypePtr getReturnType(const DataTypes & arguments) const { if (arguments.size() != 1) throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 1.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); return new ToDataType; } /// Выполнить функцию над блоком. void execute(Block & block, const ColumnNumbers & arguments, size_t result) { IDataType * from_type = &*block.getByPosition(arguments[0]).type; if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else if (typeid_cast(from_type)) ConvertImpl::execute(block, arguments, result); else throw Exception("Illegal type " + block.getByPosition(arguments[0]).type->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } }; /** Преобразование в строку фиксированной длины реализовано только из строк. */ class FunctionToFixedString : public IFunction { public: static constexpr auto name = "toFixedString"; static IFunction * create(const Context & context) { return new FunctionToFixedString; }; /// Получить имя функции. String getName() const { return name; } /** Получить тип результата по типам аргументов и значениям константных аргументов. * Если функция неприменима для данных аргументов - кинуть исключение. * Для неконстантных столбцов arguments[i].column = nullptr. */ void getReturnTypeAndPrerequisites(const ColumnsWithNameAndType & arguments, DataTypePtr & out_return_type, std::vector & out_prerequisites) { if (arguments.size() != 2) throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); if (!arguments[1].column) throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN); if (!typeid_cast(arguments[0].type.get()) && !typeid_cast(arguments[0].type.get())) throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED); const size_t n = getSize(arguments[1]); out_return_type = new DataTypeFixedString(n); } /// Выполнить функцию над блоком. void execute(Block & block, const ColumnNumbers & arguments, size_t result) { ColumnPtr column = block.getByPosition(arguments[0]).column; size_t n = getSize(block.getByPosition(arguments[1])); if (const ColumnConstString * column_const = typeid_cast(&*column)) { if (column_const->getData().size() > n) throw Exception("String too long for type FixedString(" + toString(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE); auto resized_string = column_const->getData(); resized_string.resize(n); block.getByPosition(result).column = new ColumnConst(column_const->size(), std::move(resized_string), new DataTypeFixedString(n)); } else if (const ColumnString * column_string = typeid_cast(&*column)) { ColumnFixedString * column_fixed = new ColumnFixedString(n); ColumnPtr result_ptr = column_fixed; ColumnFixedString::Chars_t & out_chars = column_fixed->getChars(); const ColumnString::Chars_t & in_chars = column_string->getChars(); const ColumnString::Offsets_t & in_offsets = column_string->getOffsets(); out_chars.resize_fill(in_offsets.size() * n); for (size_t i = 0; i < in_offsets.size(); ++i) { size_t off = i ? in_offsets[i - 1] : 0; size_t len = in_offsets[i] - off - 1; if (len > n) throw Exception("String too long for type FixedString(" + toString(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE); memcpy(&out_chars[i * n], &in_chars[off], len); } block.getByPosition(result).column = result_ptr; } else if (const auto column_fixed_string = typeid_cast(column.get())) { const auto src_n = column_fixed_string->getN(); if (src_n > n) throw Exception{ "String too long for type FixedString(" + toString(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE }; const auto column_fixed = new ColumnFixedString{n}; block.getByPosition(result).column = column_fixed; auto & out_chars = column_fixed->getChars(); const auto & in_chars = column_fixed_string->getChars(); const auto size = column_fixed_string->size(); out_chars.resize_fill(size * n); for (const auto i : ext::range(0, size)) memcpy(&out_chars[i * n], &in_chars[i * src_n], src_n); } else throw Exception("Unexpected column: " + column->getName(), ErrorCodes::ILLEGAL_COLUMN); } private: template bool getSizeTyped(const ColumnWithNameAndType & column, size_t & out_size) { if (!typeid_cast::Type *>(&*column.type)) return false; const ColumnConst * column_const = typeid_cast *>(&*column.column); if (!column_const) throw Exception("Unexpected type of column for FixedString length: " + column.column->getName(), ErrorCodes::ILLEGAL_COLUMN); T s = column_const->getData(); if (s <= 0) throw Exception("FixedString length must be positive (unlike " + toString(s) + ")", ErrorCodes::ILLEGAL_COLUMN); out_size = static_cast(s); return true; } size_t getSize(const ColumnWithNameAndType & column) { size_t res; if (getSizeTyped(column, res) || getSizeTyped(column, res) || getSizeTyped(column, res) || getSizeTyped(column, res) || getSizeTyped< Int8 >(column, res) || getSizeTyped< Int16>(column, res) || getSizeTyped< Int32>(column, res) || getSizeTyped< Int64>(column, res)) return res; throw Exception("Length of FixedString must be integer; got " + column.type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } }; struct NameToUInt8 { static constexpr auto name = "toUInt8"; }; struct NameToUInt16 { static constexpr auto name = "toUInt16"; }; struct NameToUInt32 { static constexpr auto name = "toUInt32"; }; struct NameToUInt64 { static constexpr auto name = "toUInt64"; }; struct NameToInt8 { static constexpr auto name = "toInt8"; }; struct NameToInt16 { static constexpr auto name = "toInt16"; }; struct NameToInt32 { static constexpr auto name = "toInt32"; }; struct NameToInt64 { static constexpr auto name = "toInt64"; }; struct NameToFloat32 { static constexpr auto name = "toFloat32"; }; struct NameToFloat64 { static constexpr auto name = "toFloat64"; }; struct NameToDate { static constexpr auto name = "toDate"; }; struct NameToDateTime { static constexpr auto name = "toDateTime"; }; struct NameToString { static constexpr auto name = "toString"; }; typedef FunctionConvert FunctionToUInt8; typedef FunctionConvert FunctionToUInt16; typedef FunctionConvert FunctionToUInt32; typedef FunctionConvert FunctionToUInt64; typedef FunctionConvert FunctionToInt8; typedef FunctionConvert FunctionToInt16; typedef FunctionConvert FunctionToInt32; typedef FunctionConvert FunctionToInt64; typedef FunctionConvert FunctionToFloat32; typedef FunctionConvert FunctionToFloat64; typedef FunctionConvert FunctionToDate; typedef FunctionConvert FunctionToDateTime; typedef FunctionConvert FunctionToString; }