mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #25609 from zxc111/master
This commit is contained in:
commit
b46ac3dfd1
@ -56,3 +56,37 @@ const char * const hex_char_to_digit_table =
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
|
||||
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";
|
||||
|
||||
const char * const bin_byte_to_char_table =
|
||||
"0000000000000001000000100000001100000100000001010000011000000111"
|
||||
"0000100000001001000010100000101100001100000011010000111000001111"
|
||||
"0001000000010001000100100001001100010100000101010001011000010111"
|
||||
"0001100000011001000110100001101100011100000111010001111000011111"
|
||||
"0010000000100001001000100010001100100100001001010010011000100111"
|
||||
"0010100000101001001010100010101100101100001011010010111000101111"
|
||||
"0011000000110001001100100011001100110100001101010011011000110111"
|
||||
"0011100000111001001110100011101100111100001111010011111000111111"
|
||||
"0100000001000001010000100100001101000100010001010100011001000111"
|
||||
"0100100001001001010010100100101101001100010011010100111001001111"
|
||||
"0101000001010001010100100101001101010100010101010101011001010111"
|
||||
"0101100001011001010110100101101101011100010111010101111001011111"
|
||||
"0110000001100001011000100110001101100100011001010110011001100111"
|
||||
"0110100001101001011010100110101101101100011011010110111001101111"
|
||||
"0111000001110001011100100111001101110100011101010111011001110111"
|
||||
"0111100001111001011110100111101101111100011111010111111001111111"
|
||||
"1000000010000001100000101000001110000100100001011000011010000111"
|
||||
"1000100010001001100010101000101110001100100011011000111010001111"
|
||||
"1001000010010001100100101001001110010100100101011001011010010111"
|
||||
"1001100010011001100110101001101110011100100111011001111010011111"
|
||||
"1010000010100001101000101010001110100100101001011010011010100111"
|
||||
"1010100010101001101010101010101110101100101011011010111010101111"
|
||||
"1011000010110001101100101011001110110100101101011011011010110111"
|
||||
"1011100010111001101110101011101110111100101111011011111010111111"
|
||||
"1100000011000001110000101100001111000100110001011100011011000111"
|
||||
"1100100011001001110010101100101111001100110011011100111011001111"
|
||||
"1101000011010001110100101101001111010100110101011101011011010111"
|
||||
"1101100011011001110110101101101111011100110111011101111011011111"
|
||||
"1110000011100001111000101110001111100100111001011110011011100111"
|
||||
"1110100011101001111010101110101111101100111011011110111011101111"
|
||||
"1111000011110001111100101111001111110100111101011111011011110111"
|
||||
"1111100011111001111110101111101111111100111111011111111011111111";
|
||||
|
@ -39,6 +39,12 @@ inline void writeHexByteLowercase(UInt8 byte, void * out)
|
||||
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
|
||||
}
|
||||
|
||||
extern const char * const bin_byte_to_char_table;
|
||||
|
||||
inline void writeBinByte(UInt8 byte, void * out)
|
||||
{
|
||||
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
|
||||
}
|
||||
|
||||
/// Produces hex representation of an unsigned int with leading zeros (for checksums)
|
||||
template <typename TUInt>
|
||||
|
@ -21,6 +21,8 @@ void registerFunctionsCoding(FunctionFactory & factory)
|
||||
factory.registerFunction<FunctionUUIDStringToNum>();
|
||||
factory.registerFunction<FunctionHex>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionUnhex>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionBin>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionUnbin>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionChar>(FunctionFactory::CaseInsensitive);
|
||||
factory.registerFunction<FunctionBitmaskToArray>();
|
||||
factory.registerFunction<FunctionBitPositionsToArray>();
|
||||
|
@ -65,7 +65,6 @@ namespace ErrorCodes
|
||||
constexpr size_t uuid_bytes_length = 16;
|
||||
constexpr size_t uuid_text_length = 36;
|
||||
|
||||
|
||||
class FunctionIPv6NumToString : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -951,19 +950,22 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class FunctionHex : public IFunction
|
||||
/// Encode number or string to string with binary or hexadecimal representation
|
||||
template <typename Impl>
|
||||
class EncodeToBinaryRepr : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "hex";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionHex>(); }
|
||||
static constexpr auto name = Impl::name;
|
||||
static constexpr size_t word_size = Impl::word_size;
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<EncodeToBinaryRepr>(); }
|
||||
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
@ -983,235 +985,6 @@ public:
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void executeOneUInt(T x, char *& out) const
|
||||
{
|
||||
bool was_nonzero = false;
|
||||
for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8)
|
||||
{
|
||||
UInt8 byte = x >> offset;
|
||||
|
||||
/// Leading zeros.
|
||||
if (byte == 0 && !was_nonzero && offset) // -V560
|
||||
continue;
|
||||
|
||||
was_nonzero = true;
|
||||
|
||||
writeHexByteUppercase(byte, out);
|
||||
out += 2;
|
||||
}
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool tryExecuteUInt(const IColumn * col, ColumnPtr & col_res) const
|
||||
{
|
||||
const ColumnVector<T> * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
|
||||
|
||||
static constexpr size_t MAX_UINT_HEX_LENGTH = sizeof(T) * 2 + 1; /// Including trailing zero byte.
|
||||
|
||||
if (col_vec)
|
||||
{
|
||||
auto col_str = ColumnString::create();
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
|
||||
|
||||
size_t size = in_vec.size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(size * 3 + MAX_UINT_HEX_LENGTH); /// 3 is length of one byte in hex plus zero byte.
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
/// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it).
|
||||
if (pos + MAX_UINT_HEX_LENGTH > out_vec.size())
|
||||
out_vec.resize(out_vec.size() * 2 + MAX_UINT_HEX_LENGTH);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(&out_vec[pos]);
|
||||
char * end = begin;
|
||||
executeOneUInt<T>(in_vec[i], end);
|
||||
|
||||
pos += end - begin;
|
||||
out_offsets[i] = pos;
|
||||
}
|
||||
|
||||
out_vec.resize(pos);
|
||||
|
||||
col_res = std::move(col_str);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes) const
|
||||
{
|
||||
const size_t hex_length = type_size_in_bytes * 2 + 1; /// Including trailing zero byte.
|
||||
auto col_str = ColumnString::create();
|
||||
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
size_t size = in_vec.size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(size * hex_length);
|
||||
|
||||
size_t pos = 0;
|
||||
char * out = reinterpret_cast<char *>(&out_vec[0]);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const UInt8 * in_pos = reinterpret_cast<const UInt8 *>(&in_vec[i]);
|
||||
executeOneString(in_pos, in_pos + type_size_in_bytes, out);
|
||||
|
||||
pos += hex_length;
|
||||
out_offsets[i] = pos;
|
||||
}
|
||||
col_res = std::move(col_str);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const
|
||||
{
|
||||
const ColumnVector<T> * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
|
||||
if (col_vec)
|
||||
{
|
||||
const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
|
||||
executeFloatAndDecimal<typename ColumnVector<T>::Container>(in_vec, col_res, sizeof(T));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const
|
||||
{
|
||||
const ColumnDecimal<T> * col_dec = checkAndGetColumn<ColumnDecimal<T>>(col);
|
||||
if (col_dec)
|
||||
{
|
||||
const typename ColumnDecimal<T>::Container & in_vec = col_dec->getData();
|
||||
executeFloatAndDecimal<typename ColumnDecimal<T>::Container>(in_vec, col_res, sizeof(T));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out)
|
||||
{
|
||||
while (pos < end)
|
||||
{
|
||||
writeHexByteUppercase(*pos, out);
|
||||
++pos;
|
||||
out += 2;
|
||||
}
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
|
||||
static bool tryExecuteString(const IColumn * col, ColumnPtr & col_res)
|
||||
{
|
||||
const ColumnString * col_str_in = checkAndGetColumn<ColumnString>(col);
|
||||
|
||||
if (col_str_in)
|
||||
{
|
||||
auto col_str = ColumnString::create();
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
const ColumnString::Chars & in_vec = col_str_in->getChars();
|
||||
const ColumnString::Offsets & in_offsets = col_str_in->getOffsets();
|
||||
|
||||
size_t size = in_offsets.size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(in_vec.size() * 2 - size);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||
char * pos = begin;
|
||||
size_t prev_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t new_offset = in_offsets[i];
|
||||
|
||||
executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos);
|
||||
|
||||
out_offsets[i] = pos - begin;
|
||||
|
||||
prev_offset = new_offset;
|
||||
}
|
||||
|
||||
if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
|
||||
throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
col_res = std::move(col_str);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res)
|
||||
{
|
||||
const ColumnFixedString * col_fstr_in = checkAndGetColumn<ColumnFixedString>(col);
|
||||
|
||||
if (col_fstr_in)
|
||||
{
|
||||
auto col_str = ColumnString::create();
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
const ColumnString::Chars & in_vec = col_fstr_in->getChars();
|
||||
|
||||
size_t size = col_fstr_in->size();
|
||||
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(in_vec.size() * 2 + size);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||
char * pos = begin;
|
||||
|
||||
size_t n = col_fstr_in->getN();
|
||||
|
||||
size_t prev_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t new_offset = prev_offset + n;
|
||||
|
||||
executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos);
|
||||
|
||||
out_offsets[i] = pos - begin;
|
||||
prev_offset = new_offset;
|
||||
}
|
||||
|
||||
if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
|
||||
throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
col_res = std::move(col_str);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
{
|
||||
const IColumn * column = arguments[0].column.get();
|
||||
@ -1234,19 +1007,185 @@ public:
|
||||
+ " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool tryExecuteUInt(const IColumn * col, ColumnPtr & col_res) const
|
||||
{
|
||||
const ColumnVector<T> * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
|
||||
|
||||
static constexpr size_t MAX_LENGTH = sizeof(T) * word_size + 1; /// Including trailing zero byte.
|
||||
|
||||
if (col_vec)
|
||||
{
|
||||
auto col_str = ColumnString::create();
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
|
||||
|
||||
size_t size = in_vec.size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte.
|
||||
|
||||
size_t pos = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
/// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it).
|
||||
if (pos + MAX_LENGTH > out_vec.size())
|
||||
out_vec.resize(out_vec.size() * word_size + MAX_LENGTH);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(&out_vec[pos]);
|
||||
char * end = begin;
|
||||
Impl::executeOneUInt(in_vec[i], end);
|
||||
|
||||
pos += end - begin;
|
||||
out_offsets[i] = pos;
|
||||
}
|
||||
out_vec.resize(pos);
|
||||
|
||||
col_res = std::move(col_str);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool tryExecuteString(const IColumn *col, ColumnPtr &col_res) const
|
||||
{
|
||||
const ColumnString * col_str_in = checkAndGetColumn<ColumnString>(col);
|
||||
|
||||
if (col_str_in)
|
||||
{
|
||||
auto col_str = ColumnString::create();
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
const ColumnString::Chars & in_vec = col_str_in->getChars();
|
||||
const ColumnString::Offsets & in_offsets = col_str_in->getOffsets();
|
||||
|
||||
size_t size = in_offsets.size();
|
||||
|
||||
out_offsets.resize(size);
|
||||
/// reserve `word_size` bytes for each non trailing zero byte from input + `size` bytes for trailing zeros
|
||||
out_vec.resize((in_vec.size() - size) * word_size + size);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||
char * pos = begin;
|
||||
size_t prev_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t new_offset = in_offsets[i];
|
||||
|
||||
Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset - 1], pos);
|
||||
|
||||
out_offsets[i] = pos - begin;
|
||||
|
||||
prev_offset = new_offset;
|
||||
}
|
||||
if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
|
||||
throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
col_res = std::move(col_str);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool tryExecuteDecimal(const IColumn * col, ColumnPtr & col_res) const
|
||||
{
|
||||
const ColumnDecimal<T> * col_dec = checkAndGetColumn<ColumnDecimal<T>>(col);
|
||||
if (col_dec)
|
||||
{
|
||||
const typename ColumnDecimal<T>::Container & in_vec = col_dec->getData();
|
||||
Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool tryExecuteFixedString(const IColumn * col, ColumnPtr & col_res)
|
||||
{
|
||||
const ColumnFixedString * col_fstr_in = checkAndGetColumn<ColumnFixedString>(col);
|
||||
|
||||
if (col_fstr_in)
|
||||
{
|
||||
auto col_str = ColumnString::create();
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
const ColumnString::Chars & in_vec = col_fstr_in->getChars();
|
||||
|
||||
size_t size = col_fstr_in->size();
|
||||
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(in_vec.size() * word_size + size);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||
char * pos = begin;
|
||||
|
||||
size_t n = col_fstr_in->getN();
|
||||
|
||||
size_t prev_offset = 0;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
size_t new_offset = prev_offset + n;
|
||||
|
||||
Impl::executeOneString(&in_vec[prev_offset], &in_vec[new_offset], pos);
|
||||
|
||||
out_offsets[i] = pos - begin;
|
||||
prev_offset = new_offset;
|
||||
}
|
||||
|
||||
if (!out_offsets.empty() && out_offsets.back() != out_vec.size())
|
||||
throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
col_res = std::move(col_str);
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res) const
|
||||
{
|
||||
const ColumnVector<T> * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
|
||||
if (col_vec)
|
||||
{
|
||||
const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
|
||||
Impl::executeFloatAndDecimal(in_vec, col_res, sizeof(T));
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class FunctionUnhex : public IFunction
|
||||
/// Decode number or string from string with binary or hexadecimal representation
|
||||
template <typename Impl>
|
||||
class DecodeFromBinaryRepr : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "unhex";
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionUnhex>(); }
|
||||
static constexpr auto name = Impl::name;
|
||||
static constexpr size_t word_size = Impl::word_size;
|
||||
static FunctionPtr create(ContextPtr) { return std::make_shared<DecodeFromBinaryRepr>(); }
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
String getName() const override { return name; }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
|
||||
@ -1255,29 +1194,11 @@ public:
|
||||
{
|
||||
if (!isString(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
static void unhexOne(const char * pos, const char * end, char *& out)
|
||||
{
|
||||
if ((end - pos) & 1)
|
||||
{
|
||||
*out = unhex(*pos);
|
||||
++out;
|
||||
++pos;
|
||||
}
|
||||
while (pos < end)
|
||||
{
|
||||
*out = unhex2(pos);
|
||||
pos += 2;
|
||||
++out;
|
||||
}
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
|
||||
@ -1296,7 +1217,7 @@ public:
|
||||
|
||||
size_t size = in_offsets.size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(in_vec.size() / 2 + size);
|
||||
out_vec.resize(in_vec.size() / word_size + size);
|
||||
|
||||
char * begin = reinterpret_cast<char *>(out_vec.data());
|
||||
char * pos = begin;
|
||||
@ -1306,7 +1227,7 @@ public:
|
||||
{
|
||||
size_t new_offset = in_offsets[i];
|
||||
|
||||
unhexOne(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);
|
||||
Impl::decode(reinterpret_cast<const char *>(&in_vec[prev_offset]), reinterpret_cast<const char *>(&in_vec[new_offset - 1]), pos);
|
||||
|
||||
out_offsets[i] = pos - begin;
|
||||
|
||||
@ -1326,6 +1247,219 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
struct HexImpl
|
||||
{
|
||||
static constexpr auto name = "hex";
|
||||
static constexpr size_t word_size = 2;
|
||||
|
||||
template <typename T>
|
||||
static void executeOneUInt(T x, char *& out)
|
||||
{
|
||||
bool was_nonzero = false;
|
||||
for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8)
|
||||
{
|
||||
UInt8 byte = x >> offset;
|
||||
|
||||
/// Skip leading zeros
|
||||
if (byte == 0 && !was_nonzero && offset)
|
||||
continue;
|
||||
|
||||
was_nonzero = true;
|
||||
writeHexByteUppercase(byte, out);
|
||||
out += word_size;
|
||||
}
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
|
||||
static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out)
|
||||
{
|
||||
while (pos < end)
|
||||
{
|
||||
writeHexByteUppercase(*pos, out);
|
||||
++pos;
|
||||
out += word_size;
|
||||
}
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes)
|
||||
{
|
||||
const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte.
|
||||
auto col_str = ColumnString::create();
|
||||
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
size_t size = in_vec.size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(size * hex_length);
|
||||
|
||||
size_t pos = 0;
|
||||
char * out = reinterpret_cast<char *>(&out_vec[0]);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const UInt8 * in_pos = reinterpret_cast<const UInt8 *>(&in_vec[i]);
|
||||
executeOneString(in_pos, in_pos + type_size_in_bytes, out);
|
||||
|
||||
pos += hex_length;
|
||||
out_offsets[i] = pos;
|
||||
}
|
||||
col_res = std::move(col_str);
|
||||
}
|
||||
};
|
||||
|
||||
struct UnhexImpl
|
||||
{
|
||||
static constexpr auto name = "unhex";
|
||||
static constexpr size_t word_size = 2;
|
||||
|
||||
static void decode(const char * pos, const char * end, char *& out)
|
||||
{
|
||||
if ((end - pos) & 1)
|
||||
{
|
||||
*out = unhex(*pos);
|
||||
++out;
|
||||
++pos;
|
||||
}
|
||||
while (pos < end)
|
||||
{
|
||||
*out = unhex2(pos);
|
||||
pos += word_size;
|
||||
++out;
|
||||
}
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
};
|
||||
|
||||
struct BinImpl
|
||||
{
|
||||
static constexpr auto name = "bin";
|
||||
static constexpr size_t word_size = 8;
|
||||
|
||||
template <typename T>
|
||||
static void executeOneUInt(T x, char *& out)
|
||||
{
|
||||
bool was_nonzero = false;
|
||||
for (int offset = (sizeof(T) - 1) * 8; offset >= 0; offset -= 8)
|
||||
{
|
||||
UInt8 byte = x >> offset;
|
||||
|
||||
/// Skip leading zeros
|
||||
if (byte == 0 && !was_nonzero && offset)
|
||||
continue;
|
||||
|
||||
was_nonzero = true;
|
||||
writeBinByte(byte, out);
|
||||
out += word_size;
|
||||
}
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void executeFloatAndDecimal(const T & in_vec, ColumnPtr & col_res, const size_t type_size_in_bytes)
|
||||
{
|
||||
const size_t hex_length = type_size_in_bytes * word_size + 1; /// Including trailing zero byte.
|
||||
auto col_str = ColumnString::create();
|
||||
|
||||
ColumnString::Chars & out_vec = col_str->getChars();
|
||||
ColumnString::Offsets & out_offsets = col_str->getOffsets();
|
||||
|
||||
size_t size = in_vec.size();
|
||||
out_offsets.resize(size);
|
||||
out_vec.resize(size * hex_length);
|
||||
|
||||
size_t pos = 0;
|
||||
char * out = reinterpret_cast<char *>(out_vec.data());
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
const UInt8 * in_pos = reinterpret_cast<const UInt8 *>(&in_vec[i]);
|
||||
executeOneString(in_pos, in_pos + type_size_in_bytes, out);
|
||||
|
||||
pos += hex_length;
|
||||
out_offsets[i] = pos;
|
||||
}
|
||||
col_res = std::move(col_str);
|
||||
}
|
||||
|
||||
static void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out)
|
||||
{
|
||||
while (pos < end)
|
||||
{
|
||||
writeBinByte(*pos, out);
|
||||
++pos;
|
||||
out += word_size;
|
||||
}
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
};
|
||||
|
||||
struct UnbinImpl
|
||||
{
|
||||
static constexpr auto name = "unbin";
|
||||
static constexpr size_t word_size = 8;
|
||||
|
||||
static void decode(const char * pos, const char * end, char *& out)
|
||||
{
|
||||
if (pos == end)
|
||||
{
|
||||
*out = '\0';
|
||||
++out;
|
||||
return;
|
||||
}
|
||||
|
||||
UInt8 left = 0;
|
||||
|
||||
/// end - pos is the length of input.
|
||||
/// (length & 7) to make remain bits length mod 8 is zero to split.
|
||||
/// e.g. the length is 9 and the input is "101000001",
|
||||
/// first left_cnt is 1, left is 0, right shift, pos is 1, left = 1
|
||||
/// then, left_cnt is 0, remain input is '01000001'.
|
||||
for (UInt8 left_cnt = (end - pos) & 7; left_cnt > 0; --left_cnt)
|
||||
{
|
||||
left = left << 1;
|
||||
if (*pos != '0')
|
||||
left += 1;
|
||||
++pos;
|
||||
}
|
||||
|
||||
if (left != 0 || end - pos == 0)
|
||||
{
|
||||
*out = left;
|
||||
++out;
|
||||
}
|
||||
|
||||
assert((end - pos) % 8 == 0);
|
||||
|
||||
while (end - pos != 0)
|
||||
{
|
||||
UInt8 c = 0;
|
||||
for (UInt8 i = 0; i < 8; ++i)
|
||||
{
|
||||
c = c << 1;
|
||||
if (*pos != '0')
|
||||
c += 1;
|
||||
++pos;
|
||||
}
|
||||
*out = c;
|
||||
++out;
|
||||
}
|
||||
|
||||
*out = '\0';
|
||||
++out;
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionHex = EncodeToBinaryRepr<HexImpl>;
|
||||
using FunctionUnhex = DecodeFromBinaryRepr<UnhexImpl>;
|
||||
using FunctionBin = EncodeToBinaryRepr<BinImpl>;
|
||||
using FunctionUnbin = DecodeFromBinaryRepr<UnbinImpl>;
|
||||
|
||||
class FunctionChar : public IFunction
|
||||
{
|
||||
public:
|
||||
|
35
tests/queries/0_stateless/01926_bin_unbin.reference
Normal file
35
tests/queries/0_stateless/01926_bin_unbin.reference
Normal file
@ -0,0 +1,35 @@
|
||||
|
||||
00000000
|
||||
00000001
|
||||
00001010
|
||||
01111111
|
||||
11111111
|
||||
0000000100000000
|
||||
0000000111111111
|
||||
0000001000000000
|
||||
00110000
|
||||
0011000100110000
|
||||
111001101011010110001011111010001010111110010101
|
||||
11100110101101011000101111101000101011111001010100000000000000000000000000000000
|
||||
10011010100110011001100100111111
|
||||
0011001100110011001100110011001100110011001100111111001100111111
|
||||
00000000000011100010011100000111
|
||||
0000000000000000000011000011110101011101010100111010101000000001
|
||||
0011000100110010001100110011001100110010001101000011001000110100
|
||||
0011000100110010001100110011001100110010001101000011001000110100
|
||||
0011000100110010001100110011001100110010001101000011001000110100
|
||||
0011000100110010001100110011001100110010001101000011001000110100
|
||||
|
||||
1
|
||||
0
|
||||
10
|
||||
测试
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
39
tests/queries/0_stateless/01926_bin_unbin.sql
Normal file
39
tests/queries/0_stateless/01926_bin_unbin.sql
Normal file
@ -0,0 +1,39 @@
|
||||
select bin('');
|
||||
select bin(0);
|
||||
select bin(1);
|
||||
select bin(10);
|
||||
select bin(127);
|
||||
select bin(255);
|
||||
select bin(256);
|
||||
select bin(511);
|
||||
select bin(512);
|
||||
select bin('0');
|
||||
select bin('10');
|
||||
select bin('测试');
|
||||
select bin(toFixedString('测试', 10));
|
||||
select bin(toFloat32(1.2));
|
||||
select bin(toFloat64(1.2));
|
||||
select bin(toDecimal32(1.2, 8));
|
||||
select bin(toDecimal64(1.2, 17));
|
||||
select bin('12332424');
|
||||
select bin(materialize('12332424'));
|
||||
select bin(toNullable(materialize('12332424')));
|
||||
select bin(toLowCardinality(materialize('12332424')));
|
||||
|
||||
select unbin('');
|
||||
select unbin('0') == '\0';
|
||||
select unbin('00110000'); -- 0
|
||||
select unbin('0011000100110000'); -- 10
|
||||
select unbin('111001101011010110001011111010001010111110010101'); -- 测试
|
||||
select unbin(materialize('00110000'));
|
||||
select unbin(toNullable(materialize('00110000')));
|
||||
select unbin(toLowCardinality(materialize('00110000')));
|
||||
|
||||
select unbin(bin('')) == '';
|
||||
select bin(unbin('')) == '';
|
||||
select bin(unbin('0')) == '00000000';
|
||||
|
||||
-- hex and bin consistent for corner cases
|
||||
select hex('') == bin('');
|
||||
select unhex('') == unbin('');
|
||||
select unhex('0') == unbin('0');
|
Loading…
Reference in New Issue
Block a user