#include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int LOGICAL_ERROR; extern const int ILLEGAL_COLUMN; } /* * hex(x) - Returns hexadecimal representation; capital letters; there are no prefixes 0x or suffixes h. * For numbers, returns a variable-length string - hex in the "human" (big endian) format, with the leading zeros being cut, * but only by whole bytes. For dates and datetimes - the same as for numbers. * For example, hex(257) = '0101'. * * unhex(string) - Returns a string, hex of which is equal to `string` with regard of case and discarding one leading zero. * If such a string does not exist, could return arbitrary implementation specific value. * * bin(x) - Returns binary representation. * * unbin(x) - Returns a string, opposite to `bin`. * */ struct HexImpl { static constexpr auto name = "hex"; static constexpr size_t word_size = 2; template static void executeOneUIntOrInt(T x, char *& out, bool skip_leading_zero = true, bool auto_close = true) { bool was_nonzero = false; for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) { UInt8 byte = x >> offset; /// Skip leading zeros if (byte == 0 && !was_nonzero && offset && skip_leading_zero) continue; was_nonzero = true; writeHexByteUppercase(byte, out); out += word_size; } if (auto_close) { *out = '\0'; ++out; } } static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out, bool reverse_order = false) { if (!reverse_order) { while (pos < end) { writeHexByteUppercase(*pos, out); ++pos; out += word_size; } } else { const auto * start_pos = pos; pos = end - 1; while (pos >= start_pos) { writeHexByteUppercase(*pos, out); --pos; out += word_size; } } *out = '\0'; ++out; } template static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) { const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte. auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); size_t size = in_vec.size(); out_offsets.resize(size); out_vec.resize(size * hex_length); size_t pos = 0; char * out = reinterpret_cast(out_vec.data()); for (size_t i = 0; i < size; ++i) { const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); bool reverse_order = (std::endian::native == std::endian::big); executeOneString(in_pos, in_pos + type_size_in_bytes, out, reverse_order); pos += hex_length; out_offsets[i] = pos; } col_res = std::move(col_str); } }; struct UnhexImpl { static constexpr auto name = "unhex"; static constexpr size_t word_size = 2; static void decode(const char * pos, const char * end, char *& out) { hexStringDecode(pos, end, out, word_size); } }; struct BinImpl { static constexpr auto name = "bin"; static constexpr size_t word_size = 8; template static void executeOneUIntOrInt(T x, char *& out, bool skip_leading_zero = true, bool auto_close = true) { bool was_nonzero = false; for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8) { UInt8 byte = x >> offset; /// Skip leading zeros if (byte == 0 && !was_nonzero && offset && skip_leading_zero) continue; was_nonzero = true; writeBinByte(byte, out); out += word_size; } if (auto_close) { *out = '\0'; ++out; } } template static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) { const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte. auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); size_t size = in_vec.size(); out_offsets.resize(size); out_vec.resize(size * hex_length); size_t pos = 0; char * out = reinterpret_cast(out_vec.data()); for (size_t i = 0; i < size; ++i) { const UInt8 * in_pos = reinterpret_cast(&in_vec[i]); bool reverse_order = (std::endian::native == std::endian::big); executeOneString(in_pos, in_pos + type_size_in_bytes, out, reverse_order); pos += hex_length; out_offsets[i] = pos; } col_res = std::move(col_str); } static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out, bool reverse_order = false) { if (!reverse_order) { while (pos < end) { writeBinByte(*pos, out); ++pos; out += word_size; } } else { const auto * start_pos = pos; pos = end - 1; while (pos >= start_pos) { writeBinByte(*pos, out); --pos; out += word_size; } } *out = '\0'; ++out; } }; struct UnbinImpl { static constexpr auto name = "unbin"; static constexpr size_t word_size = 8; static void decode(const char * pos, const char * end, char *& out) { binStringDecode(pos, end, out); } }; /// Encode number or string to string with binary or hexadecimal representation template class EncodeToBinaryRepresentation : public IFunction { public: static constexpr auto name = Impl::name; static constexpr size_t word_size = Impl::word_size; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } bool useDefaultImplementationForConstants() const override { return true; } bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { WhichDataType which(arguments[0]); if (!which.isStringOrFixedString() && !which.isDate() && !which.isDateTime() && !which.isDateTime64() && !which.isUInt() && !which.isInt() && !which.isFloat() && !which.isDecimal() && !which.isUUID() && !which.isIPv4() && !which.isIPv6() && !which.isAggregateFunction()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); return std::make_shared(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const IColumn * column = arguments[0].column.get(); ColumnPtr res_column; WhichDataType which(column->getDataType()); if (which.isAggregateFunction()) { const ColumnPtr to_string = castColumn(arguments[0], std::make_shared()); const auto * str_column = checkAndGetColumn(to_string.get()); tryExecuteString(str_column, res_column); return res_column; } if (tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteString(column, res_column) || tryExecuteFixedString(column, res_column) || tryExecuteFloat(column, res_column) || tryExecuteFloat(column, res_column) || tryExecuteDecimal(column, res_column) || tryExecuteDecimal(column, res_column) || tryExecuteDecimal(column, res_column) || tryExecuteDecimal(column, res_column) || tryExecuteUUID(column, res_column) || tryExecuteIPv4(column, res_column) || tryExecuteIPv6(column, res_column)) return res_column; throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); } template bool tryExecuteUIntOrInt(const IColumn * col, ColumnPtr & col_res) const { const ColumnVector * col_vec = checkAndGetColumn>(col); static constexpr size_t MAX_LENGTH = sizeof(T) * word_size + 1; /// Including trailing zero byte. if (col_vec) { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); const typename ColumnVector::Container & in_vec = col_vec->getData(); size_t size = in_vec.size(); out_offsets.resize(size); out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. size_t pos = 0; for (size_t i = 0; i < size; ++i) { /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). if (pos + MAX_LENGTH > out_vec.size()) out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); char * begin = reinterpret_cast(&out_vec[pos]); char * end = begin; Impl::executeOneUIntOrInt(in_vec[i], end); pos += end - begin; out_offsets[i] = pos; } out_vec.resize(pos); col_res = std::move(col_str); return true; } else { return false; } } bool tryExecuteString(const IColumn *col, ColumnPtr &col_res) const { const ColumnString * col_str_in = checkAndGetColumn(col); if (col_str_in) { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); const ColumnString::Chars & in_vec = col_str_in->getChars(); const ColumnString::Offsets & in_offsets = col_str_in->getOffsets(); size_t size = in_offsets.size(); out_offsets.resize(size); /// reserve `word_size` bytes for each non trailing zero byte from input + `size` bytes for trailing zeros out_vec.resize((in_vec.size() - size) * word_size + size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { size_t new_offset = in_offsets[i]; Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos); out_offsets[i] = pos - begin; prev_offset = new_offset; } if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); col_res = std::move(col_str); return true; } else { return false; } } template bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const { const ColumnDecimal * col_dec = checkAndGetColumn>(col); if (col_dec) { const typename ColumnDecimal::Container & in_vec = col_dec->getData(); Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T)); return true; } else { return false; } } static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res) { const ColumnFixedString * col_fstr_in = checkAndGetColumn(col); if (col_fstr_in) { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); const ColumnString::Chars & in_vec = col_fstr_in->getChars(); size_t size = col_fstr_in->size(); out_offsets.resize(size); out_vec.resize(in_vec.size() * word_size + size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; size_t n = col_fstr_in->getN(); size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { size_t new_offset = prev_offset + n; Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos); out_offsets[i] = pos - begin; prev_offset = new_offset; } if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); col_res = std::move(col_str); return true; } else { return false; } } template bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const { const ColumnVector * col_vec = checkAndGetColumn>(col); if (col_vec) { const typename ColumnVector::Container & in_vec = col_vec->getData(); Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T)); return true; } else { return false; } } bool tryExecuteUUID(const IColumn * col, ColumnPtr & col_res) const { const ColumnUUID * col_vec = checkAndGetColumn(col); static constexpr size_t MAX_LENGTH = sizeof(UUID) * word_size + 1; /// Including trailing zero byte. if (col_vec) { auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); const typename ColumnUUID::Container & in_vec = col_vec->getData(); const UUID* uuid = in_vec.data(); size_t size = in_vec.size(); out_offsets.resize(size); out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. size_t pos = 0; for (size_t i = 0; i < size; ++i) { /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). if (pos + MAX_LENGTH > out_vec.size()) out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); char * begin = reinterpret_cast(&out_vec[pos]); char * end = begin; // use executeOnUInt instead of using executeOneString // because the latter one outputs the string in the memory order Impl::executeOneUIntOrInt(UUIDHelpers::getHighBytes(uuid[i]), end, false, false); Impl::executeOneUIntOrInt(UUIDHelpers::getLowBytes(uuid[i]), end, false, true); pos += end - begin; out_offsets[i] = pos; } out_vec.resize(pos); col_res = std::move(col_str); return true; } else { return false; } } bool tryExecuteIPv6(const IColumn * col, ColumnPtr & col_res) const { const ColumnIPv6 * col_vec = checkAndGetColumn(col); static constexpr size_t MAX_LENGTH = sizeof(IPv6) * word_size + 1; /// Including trailing zero byte. if (!col_vec) return false; auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); const typename ColumnIPv6::Container & in_vec = col_vec->getData(); const IPv6* ip = in_vec.data(); size_t size = in_vec.size(); out_offsets.resize(size); out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. size_t pos = 0; for (size_t i = 0; i < size; ++i) { /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). if (pos + MAX_LENGTH > out_vec.size()) out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); char * begin = reinterpret_cast(&out_vec[pos]); char * end = begin; Impl::executeOneString(reinterpret_cast(&ip[i].toUnderType().items[0]), reinterpret_cast(&ip[i].toUnderType().items[2]), end); pos += end - begin; out_offsets[i] = pos; } out_vec.resize(pos); col_res = std::move(col_str); return true; } bool tryExecuteIPv4(const IColumn * col, ColumnPtr & col_res) const { const ColumnIPv4 * col_vec = checkAndGetColumn(col); static constexpr size_t MAX_LENGTH = sizeof(IPv4) * word_size + 1; /// Including trailing zero byte. if (!col_vec) return false; auto col_str = ColumnString::create(); ColumnString::Chars & out_vec = col_str->getChars(); ColumnString::Offsets & out_offsets = col_str->getOffsets(); const typename ColumnIPv4::Container & in_vec = col_vec->getData(); const IPv4* ip = in_vec.data(); size_t size = in_vec.size(); out_offsets.resize(size); out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. size_t pos = 0; for (size_t i = 0; i < size; ++i) { /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). if (pos + MAX_LENGTH > out_vec.size()) out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); char * begin = reinterpret_cast(&out_vec[pos]); char * end = begin; Impl::executeOneUIntOrInt(ip[i].toUnderType(), end); pos += end - begin; out_offsets[i] = pos; } out_vec.resize(pos); col_res = std::move(col_str); return true; } }; /// Decode number or string from string with binary or hexadecimal representation template class DecodeFromBinaryRepresentation : public IFunction { public: static constexpr auto name = Impl::name; static constexpr size_t word_size = Impl::word_size; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { WhichDataType which(arguments[0]); if (!which.isStringOrFixedString()) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); return std::make_shared(); } bool useDefaultImplementationForConstants() const override { return true; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { const ColumnPtr & column = arguments[0].column; if (const ColumnString * col = checkAndGetColumn(column.get())) { auto col_res = ColumnString::create(); ColumnString::Chars & out_vec = col_res->getChars(); ColumnString::Offsets & out_offsets = col_res->getOffsets(); const ColumnString::Chars & in_vec = col->getChars(); const ColumnString::Offsets & in_offsets = col->getOffsets(); size_t size = in_offsets.size(); out_offsets.resize(size); out_vec.resize(in_vec.size() / word_size + size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { size_t new_offset = in_offsets[i]; Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset - 1]), pos); out_offsets[i] = pos - begin; prev_offset = new_offset; } out_vec.resize(pos - begin); return col_res; } else if (const ColumnFixedString * col_fix_string = checkAndGetColumn(column.get())) { auto col_res = ColumnString::create(); ColumnString::Chars & out_vec = col_res->getChars(); ColumnString::Offsets & out_offsets = col_res->getOffsets(); const ColumnString::Chars & in_vec = col_fix_string->getChars(); size_t n = col_fix_string->getN(); size_t size = col_fix_string->size(); out_offsets.resize(size); out_vec.resize(in_vec.size() / word_size + size); char * begin = reinterpret_cast(out_vec.data()); char * pos = begin; size_t prev_offset = 0; for (size_t i = 0; i < size; ++i) { size_t new_offset = prev_offset + n; Impl::decode(reinterpret_cast(&in_vec[prev_offset]), reinterpret_cast(&in_vec[new_offset]), pos); out_offsets[i] = pos - begin; prev_offset = new_offset; } out_vec.resize(pos - begin); return col_res; } else { throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", arguments[0].column->getName(), getName()); } } }; REGISTER_FUNCTION(BinaryRepr) { factory.registerFunction>({}, FunctionFactory::CaseInsensitive); factory.registerFunction>({}, FunctionFactory::CaseInsensitive); factory.registerFunction>({}, FunctionFactory::CaseInsensitive); factory.registerFunction>({}, FunctionFactory::CaseInsensitive); } }