dbms: add MD5, SHA1, SHA224, SHA256, IPv6NumToString. [#METR-13151]

This commit is contained in:
Andrey Mironov 2014-10-29 15:25:33 +03:00
parent f1e559bbd5
commit ee34ca31d8
6 changed files with 247 additions and 7 deletions

View File

@ -140,6 +140,9 @@ public:
max = FieldType(data);
}
DataTypePtr & getDataType() { return data_type; }
const DataTypePtr & getDataType() const { return data_type; }
private:
size_t s;
T data;

View File

@ -30,7 +30,7 @@ class SipHash
private:
typedef uint64_t u64;
typedef uint8_t u8;
/// Состояние.
u64 v0;
u64 v1;
@ -131,7 +131,7 @@ public:
}
/// Получить результат в некотором виде. Это можно сделать только один раз!
void get128(char * out)
{
finalize();
@ -158,7 +158,14 @@ public:
#undef SIPROUND
inline uint64_t sipHash64(const char * data, size_t size)
inline void sipHash128(const char * data, const size_t size, char * out)
{
SipHash hash;
hash.update(data, size);
hash.get128(out);
}
inline uint64_t sipHash64(const char * data, const size_t size)
{
SipHash hash;
hash.update(data, size);

View File

@ -12,6 +12,7 @@
#include <DB/Columns/ColumnArray.h>
#include <DB/Columns/ColumnConst.h>
#include <DB/Functions/IFunction.h>
#include <arpa/inet.h>
namespace DB
@ -35,6 +36,91 @@ namespace DB
/// Включая нулевой символ в конце.
#define MAX_UINT_HEX_LENGTH 20
const auto ipv6_fixed_string_length = 16;
class FunctionIPv6NumToString : public IFunction
{
public:
String getName() const { return "IPv6NumToString"; }
DataTypePtr getReturnType(const DataTypes & arguments) const
{
if (arguments.size() != 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
const auto ptr = typeid_cast<const DataTypeFixedString *>(arguments[0].get());
if (!ptr || ptr->getN() != ipv6_fixed_string_length)
throw Exception("Illegal type " + arguments[0]->getName() +
" of argument of function " + getName() +
", expected FixedString(" + toString(ipv6_fixed_string_length) + ")",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return new DataTypeString;
}
void execute(Block & block, const ColumnNumbers & arguments, const size_t result)
{
const auto & col_name_type = block.getByPosition(arguments[0]);
const ColumnPtr & column = col_name_type.column;
if (const auto col_in = typeid_cast<const ColumnFixedString *>(column.get()))
{
if (col_in->getN() != ipv6_fixed_string_length)
throw Exception("Illegal type " + col_name_type.type->getName() +
" of column " + col_in->getName() +
" argument of function " + getName() +
", expected FixedString(" + toString(ipv6_fixed_string_length) + ")",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto size = col_in->size();
const auto & vec_in = col_in->getChars();
auto col_res = new ColumnString;
block.getByPosition(result).column = col_res;
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(size * INET6_ADDRSTRLEN);
offsets_res.resize(size);
auto begin = reinterpret_cast<char *>(&vec_res[0]);
auto pos = begin;
for (size_t i = 0; i < vec_in.size(); i += ipv6_fixed_string_length)
{
inet_ntop(AF_INET6, &vec_in[i], pos, INET6_ADDRSTRLEN);
pos = static_cast<char *>(memchr(pos, 0, INET6_ADDRSTRLEN)) + 1;
offsets_res[i] = pos - begin;
}
vec_res.resize(pos - begin);
}
else if (const auto col_in = typeid_cast<const ColumnConst<String> *>(column.get()))
{
const auto data_type_fixed_string = typeid_cast<const DataTypeFixedString *>(col_in->getDataType().get());
if (!data_type_fixed_string || data_type_fixed_string->getN() != ipv6_fixed_string_length)
throw Exception("Illegal type " + col_name_type.type->getName() +
" of column " + col_in->getName() +
" argument of function " + getName() +
", expected FixedString(" + toString(ipv6_fixed_string_length) + ")",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
const auto & data_in = col_in->getData();
char buf[INET6_ADDRSTRLEN];
inet_ntop(AF_INET6, data_in.data(), buf, sizeof(buf));
block.getByPosition(result).column = new ColumnConstString{col_in->size(), buf};
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
class FunctionIPv4NumToString : public IFunction
{
public:
@ -108,7 +194,7 @@ public:
ColumnString::Chars_t & vec_res = col_res->getChars();
ColumnString::Offsets_t & offsets_res = col_res->getOffsets();
vec_res.resize(vec_in.size() * 16); /// самое длинное значение: 255.255.255.255\0
vec_res.resize(vec_in.size() * INET_ADDRSTRLEN); /// самое длинное значение: 255.255.255.255\0
offsets_res.resize(vec_in.size());
char * begin = reinterpret_cast<char *>(&vec_res[0]);
char * pos = begin;

View File

@ -1,6 +1,7 @@
#pragma once
#include <openssl/md5.h>
#include <openssl/sha.h>
#include <city.h>
#include <Poco/ByteOrder.h>
@ -64,6 +65,62 @@ struct HalfMD5Impl
}
};
struct MD5Impl
{
static constexpr auto name = "MD5";
static constexpr auto length = 16;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
MD5_CTX ctx;
MD5_Init(&ctx);
MD5_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
MD5_Final(out_char_data, &ctx);
}
};
struct SHA1Impl
{
static constexpr auto name = "SHA1";
static constexpr auto length = 20;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA_CTX ctx;
SHA1_Init(&ctx);
SHA1_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA1_Final(out_char_data, &ctx);
}
};
struct SHA224Impl
{
static constexpr auto name = "SHA224";
static constexpr auto length = 28;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA256_CTX ctx;
SHA224_Init(&ctx);
SHA224_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA224_Final(out_char_data, &ctx);
}
};
struct SHA256Impl
{
static constexpr auto name = "SHA256";
static constexpr auto length = 32;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
SHA256_CTX ctx;
SHA256_Init(&ctx);
SHA256_Update(&ctx, reinterpret_cast<const unsigned char *>(begin), size);
SHA256_Final(out_char_data, &ctx);
}
};
struct SipHash64Impl
{
static UInt64 apply(const char * begin, size_t size)
@ -72,6 +129,17 @@ struct SipHash64Impl
}
};
struct SipHash128Impl
{
static constexpr auto name = "SipHash128";
static constexpr auto length = 16;
static void apply(const char * begin, const size_t size, unsigned char * out_char_data)
{
sipHash128(begin, size, reinterpret_cast<char*>(out_char_data));
}
};
struct IntHash32Impl
{
typedef UInt32 ReturnType;
@ -152,6 +220,72 @@ public:
};
template <typename Impl>
class FunctionStringHashFixedString : public IFunction
{
public:
/// Получить имя функции.
String getName() const
{
return Impl::name;
}
/// Получить тип результата по типам аргументов. Если функция неприменима для данных аргументов - кинуть исключение.
DataTypePtr getReturnType(const DataTypes & arguments) const
{
if (arguments.size() != 1)
throw Exception("Number of arguments for function " + getName() + " doesn't match: passed "
+ toString(arguments.size()) + ", should be 1.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!typeid_cast<const DataTypeString *>(&*arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return new DataTypeFixedString{Impl::length};
}
/// Выполнить функцию над блоком.
void execute(Block & block, const ColumnNumbers & arguments, size_t result)
{
if (const ColumnString * col_from = typeid_cast<const ColumnString *>(&*block.getByPosition(arguments[0]).column))
{
auto col_to = new ColumnFixedString{Impl::length};
block.getByPosition(result).column = col_to;
const typename ColumnString::Chars_t & data = col_from->getChars();
const typename ColumnString::Offsets_t & offsets = col_from->getOffsets();
auto & chars_to = col_to->getChars();
const auto size = offsets.size();
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
Impl::apply(
reinterpret_cast<const char *>(&data[i == 0 ? 0 : offsets[i - 1]]),
i == 0 ? offsets[i] - 1 : (offsets[i] - 1 - offsets[i - 1]),
&chars_to[i * Impl::length]);
}
else if (const ColumnConstString * col_from = typeid_cast<const ColumnConstString *>(&*block.getByPosition(arguments[0]).column))
{
const auto & data = col_from->getData();
String hash(Impl::length, 0);
Impl::apply(data.data(), data.size(), reinterpret_cast<unsigned char *>(&hash[0]));
block.getByPosition(result).column = new ColumnConst<String>{
col_from->size(),
hash,
new DataTypeFixedString{Impl::length}
};
}
else
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()
+ " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
template <typename Impl, typename Name>
class FunctionIntHash : public IFunction
{
@ -465,6 +599,10 @@ typedef FunctionStringHash64<HalfMD5Impl, NameHalfMD5> FunctionHalfMD5;
typedef FunctionStringHash64<SipHash64Impl, NameSipHash64> FunctionSipHash64;
typedef FunctionIntHash<IntHash32Impl, NameIntHash32> FunctionIntHash32;
typedef FunctionIntHash<IntHash64Impl, NameIntHash64> FunctionIntHash64;
typedef FunctionStringHashFixedString<MD5Impl> FunctionMD5;
typedef FunctionStringHashFixedString<SHA1Impl> FunctionSHA1;
typedef FunctionStringHashFixedString<SHA224Impl> FunctionSHA224;
typedef FunctionStringHashFixedString<SHA256Impl> FunctionSHA256;
typedef FunctionStringHashFixedString<SipHash128Impl> FunctionSipHash128;
}

View File

@ -6,9 +6,10 @@ namespace DB
void registerFunctionsCoding(FunctionFactory & factory)
{
#define F [](const Context & context) -> IFunction*
#define F [](const Context & context) -> IFunction *
factory.registerFunction("toStringCutToZero", F { return new FunctionToStringCutToZero; });
factory.registerFunction("IPv6NumToString", F { return new FunctionIPv6NumToString; });
factory.registerFunction("IPv4NumToString", F { return new FunctionIPv4NumToString; });
factory.registerFunction("IPv4StringToNum", F { return new FunctionIPv4StringToNum; });
factory.registerFunction("hex", F { return new FunctionHex; });

View File

@ -7,10 +7,15 @@ namespace DB
void registerFunctionsHashing(FunctionFactory & factory)
{
#define F [](const Context & context) -> IFunction*
#define F [](const Context & context) -> IFunction *
factory.registerFunction("halfMD5", F { return new FunctionHalfMD5; });
factory.registerFunction("MD5", F { return new FunctionMD5; });
factory.registerFunction("SHA1", F { return new FunctionSHA1; });
factory.registerFunction("SHA224", F { return new FunctionSHA224; });
factory.registerFunction("SHA256", F { return new FunctionSHA256; });
factory.registerFunction("sipHash64", F { return new FunctionSipHash64; });
factory.registerFunction("sipHash128", F { return new FunctionSipHash128; });
factory.registerFunction("cityHash64", F { return new FunctionCityHash64; });
factory.registerFunction("intHash32", F { return new FunctionIntHash32; });
factory.registerFunction("intHash64", F { return new FunctionIntHash64; });