mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
dbms: add MD5, SHA1, SHA224, SHA256, IPv6NumToString. [#METR-13151]
This commit is contained in:
parent
f1e559bbd5
commit
ee34ca31d8
@ -140,6 +140,9 @@ public:
|
||||
max = FieldType(data);
|
||||
}
|
||||
|
||||
DataTypePtr & getDataType() { return data_type; }
|
||||
const DataTypePtr & getDataType() const { return data_type; }
|
||||
|
||||
private:
|
||||
size_t s;
|
||||
T data;
|
||||
|
@ -158,7 +158,14 @@ public:
|
||||
#undef SIPROUND
|
||||
|
||||
|
||||
inline uint64_t sipHash64(const char * data, size_t size)
|
||||
inline void sipHash128(const char * data, const size_t size, char * out)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(data, size);
|
||||
hash.get128(out);
|
||||
}
|
||||
|
||||
inline uint64_t sipHash64(const char * data, const size_t size)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(data, size);
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <DB/Columns/ColumnArray.h>
|
||||
#include <DB/Columns/ColumnConst.h>
|
||||
#include <DB/Functions/IFunction.h>
|
||||
#include <arpa/inet.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -35,6 +36,91 @@ namespace DB
|
||||
/// Включая нулевой символ в конце.
|
||||
#define MAX_UINT_HEX_LENGTH 20
|
||||
|
||||
const auto ipv6_fixed_string_length = 16;
|
||||
|
||||
class FunctionIPv6NumToString : public IFunction
|
||||
{
|
||||
public:
|
||||
String getName() const { return "IPv6NumToString"; }
|
||||
|
||||
DataTypePtr getReturnType(const DataTypes & arguments) const
|
||||
{
|
||||
if (arguments.size() != 1)
|
||||
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
|
||||
+ toString(arguments.size()) + ", should be 1.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
const auto ptr = typeid_cast<const DataTypeFixedString *>(arguments[0].get());
|
||||
if (!ptr || ptr->getN() != ipv6_fixed_string_length)
|
||||
throw Exception("Illegal type " + arguments[0]->getName() +
|
||||
" of argument of function " + getName() +
|
||||
", expected FixedString(" + toString(ipv6_fixed_string_length) + ")",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return new DataTypeString;
|
||||
}
|
||||
|
||||
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
|
||||
{
|
||||
const auto & col_name_type = block.getByPosition(arguments[0]);
|
||||
const ColumnPtr & column = col_name_type.column;
|
||||
|
||||
if (const auto col_in = typeid_cast<const ColumnFixedString *>(column.get()))
|
||||
{
|
||||
if (col_in->getN() != ipv6_fixed_string_length)
|
||||
throw Exception("Illegal type " + col_name_type.type->getName() +
|
||||
" of column " + col_in->getName() +
|
||||
" argument of function " + getName() +
|
||||
", expected FixedString(" + toString(ipv6_fixed_string_length) + ")",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const auto size = col_in->size();
|
||||
const auto & vec_in = col_in->getChars();
|
||||
|
||||
auto col_res = new ColumnString;
|
||||
block.getByPosition(result).column = col_res;
|
||||
|
||||
ColumnString::Chars_t & vec_res = col_res->getChars();
|
||||
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
|
||||
vec_res.resize(size * INET6_ADDRSTRLEN);
|
||||
offsets_res.resize(size);
|
||||
|
||||
auto begin = reinterpret_cast<char *>(&vec_res[0]);
|
||||
auto pos = begin;
|
||||
|
||||
for (size_t i = 0; i < vec_in.size(); i += ipv6_fixed_string_length)
|
||||
{
|
||||
inet_ntop(AF_INET6, &vec_in[i], pos, INET6_ADDRSTRLEN);
|
||||
pos = static_cast<char *>(memchr(pos, 0, INET6_ADDRSTRLEN)) + 1;
|
||||
offsets_res[i] = pos - begin;
|
||||
}
|
||||
|
||||
vec_res.resize(pos - begin);
|
||||
}
|
||||
else if (const auto col_in = typeid_cast<const ColumnConst<String> *>(column.get()))
|
||||
{
|
||||
const auto data_type_fixed_string = typeid_cast<const DataTypeFixedString *>(col_in->getDataType().get());
|
||||
if (!data_type_fixed_string || data_type_fixed_string->getN() != ipv6_fixed_string_length)
|
||||
throw Exception("Illegal type " + col_name_type.type->getName() +
|
||||
" of column " + col_in->getName() +
|
||||
" argument of function " + getName() +
|
||||
", expected FixedString(" + toString(ipv6_fixed_string_length) + ")",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
const auto & data_in = col_in->getData();
|
||||
|
||||
char buf[INET6_ADDRSTRLEN];
|
||||
inet_ntop(AF_INET6, data_in.data(), buf, sizeof(buf));
|
||||
|
||||
block.getByPosition(result).column = new ColumnConstString{col_in->size(), buf};
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
|
||||
+ " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionIPv4NumToString : public IFunction
|
||||
{
|
||||
public:
|
||||
@ -108,7 +194,7 @@ public:
|
||||
ColumnString::Chars_t & vec_res = col_res->getChars();
|
||||
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
|
||||
|
||||
vec_res.resize(vec_in.size() * 16); /// самое длинное значение: 255.255.255.255\0
|
||||
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// самое длинное значение: 255.255.255.255\0
|
||||
offsets_res.resize(vec_in.size());
|
||||
char * begin = reinterpret_cast<char *>(&vec_res[0]);
|
||||
char * pos = begin;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <openssl/md5.h>
|
||||
#include <openssl/sha.h>
|
||||
#include <city.h>
|
||||
|
||||
#include <Poco/ByteOrder.h>
|
||||
@ -64,6 +65,62 @@ struct HalfMD5Impl
|
||||
}
|
||||
};
|
||||
|
||||
struct MD5Impl
|
||||
{
|
||||
static constexpr auto name = "MD5";
|
||||
static constexpr auto length = 16;
|
||||
|
||||
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
||||
{
|
||||
MD5_CTX ctx;
|
||||
MD5_Init(&ctx);
|
||||
MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
|
||||
MD5_Final(out_char_data, &ctx);
|
||||
}
|
||||
};
|
||||
|
||||
struct SHA1Impl
|
||||
{
|
||||
static constexpr auto name = "SHA1";
|
||||
static constexpr auto length = 20;
|
||||
|
||||
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
||||
{
|
||||
SHA_CTX ctx;
|
||||
SHA1_Init(&ctx);
|
||||
SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
|
||||
SHA1_Final(out_char_data, &ctx);
|
||||
}
|
||||
};
|
||||
|
||||
struct SHA224Impl
|
||||
{
|
||||
static constexpr auto name = "SHA224";
|
||||
static constexpr auto length = 28;
|
||||
|
||||
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
SHA224_Init(&ctx);
|
||||
SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
|
||||
SHA224_Final(out_char_data, &ctx);
|
||||
}
|
||||
};
|
||||
|
||||
struct SHA256Impl
|
||||
{
|
||||
static constexpr auto name = "SHA256";
|
||||
static constexpr auto length = 32;
|
||||
|
||||
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
||||
{
|
||||
SHA256_CTX ctx;
|
||||
SHA256_Init(&ctx);
|
||||
SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
|
||||
SHA256_Final(out_char_data, &ctx);
|
||||
}
|
||||
};
|
||||
|
||||
struct SipHash64Impl
|
||||
{
|
||||
static UInt64 apply(const char * begin, size_t size)
|
||||
@ -72,6 +129,17 @@ struct SipHash64Impl
|
||||
}
|
||||
};
|
||||
|
||||
struct SipHash128Impl
|
||||
{
|
||||
static constexpr auto name = "SipHash128";
|
||||
static constexpr auto length = 16;
|
||||
|
||||
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
|
||||
{
|
||||
sipHash128(begin, size, reinterpret_cast<char*>(out_char_data));
|
||||
}
|
||||
};
|
||||
|
||||
struct IntHash32Impl
|
||||
{
|
||||
typedef UInt32 ReturnType;
|
||||
@ -152,6 +220,72 @@ public:
|
||||
};
|
||||
|
||||
|
||||
template <typename Impl>
|
||||
class FunctionStringHashFixedString : public IFunction
|
||||
{
|
||||
public:
|
||||
/// Получить имя функции.
|
||||
String getName() const
|
||||
{
|
||||
return Impl::name;
|
||||
}
|
||||
|
||||
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
|
||||
DataTypePtr getReturnType(const DataTypes & arguments) const
|
||||
{
|
||||
if (arguments.size() != 1)
|
||||
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
|
||||
+ toString(arguments.size()) + ", should be 1.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
return new DataTypeFixedString{Impl::length};
|
||||
}
|
||||
|
||||
/// Выполнить функцию над блоком.
|
||||
void execute(Block & block, const ColumnNumbers & arguments, size_t result)
|
||||
{
|
||||
if (const ColumnString * col_from = typeid_cast<const ColumnString *>(&*block.getByPosition(arguments[0]).column))
|
||||
{
|
||||
auto col_to = new ColumnFixedString{Impl::length};
|
||||
block.getByPosition(result).column = col_to;
|
||||
|
||||
const typename ColumnString::Chars_t & data = col_from->getChars();
|
||||
const typename ColumnString::Offsets_t & offsets = col_from->getOffsets();
|
||||
auto & chars_to = col_to->getChars();
|
||||
const auto size = offsets.size();
|
||||
chars_to.resize(size * Impl::length);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
Impl::apply(
|
||||
reinterpret_cast<const char *>(&data[i == 0 ? 0 : offsets[i - 1]]),
|
||||
i == 0 ? offsets[i] - 1 : (offsets[i] - 1 - offsets[i - 1]),
|
||||
&chars_to[i * Impl::length]);
|
||||
}
|
||||
else if (const ColumnConstString * col_from = typeid_cast<const ColumnConstString *>(&*block.getByPosition(arguments[0]).column))
|
||||
{
|
||||
const auto & data = col_from->getData();
|
||||
|
||||
String hash(Impl::length, 0);
|
||||
Impl::apply(data.data(), data.size(), reinterpret_cast<unsigned char *>(&hash[0]));
|
||||
|
||||
block.getByPosition(result).column = new ColumnConst<String>{
|
||||
col_from->size(),
|
||||
hash,
|
||||
new DataTypeFixedString{Impl::length}
|
||||
};
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
|
||||
+ " of first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename Impl, typename Name>
|
||||
class FunctionIntHash : public IFunction
|
||||
{
|
||||
@ -465,6 +599,10 @@ typedef FunctionStringHash64<HalfMD5Impl, NameHalfMD5> FunctionHalfMD5;
|
||||
typedef FunctionStringHash64<SipHash64Impl, NameSipHash64> FunctionSipHash64;
|
||||
typedef FunctionIntHash<IntHash32Impl, NameIntHash32> FunctionIntHash32;
|
||||
typedef FunctionIntHash<IntHash64Impl, NameIntHash64> FunctionIntHash64;
|
||||
|
||||
typedef FunctionStringHashFixedString<MD5Impl> FunctionMD5;
|
||||
typedef FunctionStringHashFixedString<SHA1Impl> FunctionSHA1;
|
||||
typedef FunctionStringHashFixedString<SHA224Impl> FunctionSHA224;
|
||||
typedef FunctionStringHashFixedString<SHA256Impl> FunctionSHA256;
|
||||
typedef FunctionStringHashFixedString<SipHash128Impl> FunctionSipHash128;
|
||||
|
||||
}
|
||||
|
@ -6,9 +6,10 @@ namespace DB
|
||||
|
||||
void registerFunctionsCoding(FunctionFactory & factory)
|
||||
{
|
||||
#define F [](const Context & context) -> IFunction*
|
||||
#define F [](const Context & context) -> IFunction *
|
||||
|
||||
factory.registerFunction("toStringCutToZero", F { return new FunctionToStringCutToZero; });
|
||||
factory.registerFunction("IPv6NumToString", F { return new FunctionIPv6NumToString; });
|
||||
factory.registerFunction("IPv4NumToString", F { return new FunctionIPv4NumToString; });
|
||||
factory.registerFunction("IPv4StringToNum", F { return new FunctionIPv4StringToNum; });
|
||||
factory.registerFunction("hex", F { return new FunctionHex; });
|
||||
|
@ -7,10 +7,15 @@ namespace DB
|
||||
|
||||
void registerFunctionsHashing(FunctionFactory & factory)
|
||||
{
|
||||
#define F [](const Context & context) -> IFunction*
|
||||
#define F [](const Context & context) -> IFunction *
|
||||
|
||||
factory.registerFunction("halfMD5", F { return new FunctionHalfMD5; });
|
||||
factory.registerFunction("MD5", F { return new FunctionMD5; });
|
||||
factory.registerFunction("SHA1", F { return new FunctionSHA1; });
|
||||
factory.registerFunction("SHA224", F { return new FunctionSHA224; });
|
||||
factory.registerFunction("SHA256", F { return new FunctionSHA256; });
|
||||
factory.registerFunction("sipHash64", F { return new FunctionSipHash64; });
|
||||
factory.registerFunction("sipHash128", F { return new FunctionSipHash128; });
|
||||
factory.registerFunction("cityHash64", F { return new FunctionCityHash64; });
|
||||
factory.registerFunction("intHash32", F { return new FunctionIntHash32; });
|
||||
factory.registerFunction("intHash64", F { return new FunctionIntHash64; });
|
||||
|
Loading…
Reference in New Issue
Block a user