#include #include #include #include #include #include #include #include #include "config.h" #if USE_BLAKE3 # include #endif #if USE_SSL # include # include # include # if USE_BORINGSSL # include # else # include # endif #endif /// Instatiating only the functions that require FunctionStringHashFixedString in a separate file /// to better parallelize the build procedure and avoid MSan build failure /// due to excessive resource consumption. namespace DB { namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } #if USE_SSL struct MD4Impl { static constexpr auto name = "MD4"; enum { length = MD4_DIGEST_LENGTH }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { MD4_CTX ctx; MD4_Init(&ctx); MD4_Update(&ctx, reinterpret_cast(begin), size); MD4_Final(out_char_data, &ctx); } }; struct MD5Impl { static constexpr auto name = "MD5"; enum { length = MD5_DIGEST_LENGTH }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { MD5_CTX ctx; MD5_Init(&ctx); MD5_Update(&ctx, reinterpret_cast(begin), size); MD5_Final(out_char_data, &ctx); } }; struct SHA1Impl { static constexpr auto name = "SHA1"; enum { length = SHA_DIGEST_LENGTH }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { SHA_CTX ctx; SHA1_Init(&ctx); SHA1_Update(&ctx, reinterpret_cast(begin), size); SHA1_Final(out_char_data, &ctx); } }; struct SHA224Impl { static constexpr auto name = "SHA224"; enum { length = SHA224_DIGEST_LENGTH }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { SHA256_CTX ctx; SHA224_Init(&ctx); SHA224_Update(&ctx, reinterpret_cast(begin), size); SHA224_Final(out_char_data, &ctx); } }; struct SHA256Impl { static constexpr auto name = "SHA256"; enum { length = SHA256_DIGEST_LENGTH }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { SHA256_CTX ctx; SHA256_Init(&ctx); SHA256_Update(&ctx, reinterpret_cast(begin), size); SHA256_Final(out_char_data, &ctx); } }; struct SHA384Impl { static constexpr auto name = "SHA384"; enum { length = SHA384_DIGEST_LENGTH }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { SHA512_CTX ctx; SHA384_Init(&ctx); SHA384_Update(&ctx, reinterpret_cast(begin), size); SHA384_Final(out_char_data, &ctx); } }; struct SHA512Impl { static constexpr auto name = "SHA512"; enum { length = 64 }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { SHA512_CTX ctx; SHA512_Init(&ctx); SHA512_Update(&ctx, reinterpret_cast(begin), size); SHA512_Final(out_char_data, &ctx); } }; struct SHA512Impl256 { static constexpr auto name = "SHA512_256"; enum { length = 32 }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { /// Here, we use the EVP interface that is common to both BoringSSL and OpenSSL. Though BoringSSL is the default /// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init, /// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available /// in the current version of OpenSSL that we use which necessitates the use of the EVP interface. auto * md_ctx = EVP_MD_CTX_create(); EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/); EVP_DigestUpdate(md_ctx, begin, size); EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/); EVP_MD_CTX_destroy(md_ctx); } }; #endif #if USE_BLAKE3 struct ImplBLAKE3 { static constexpr auto name = "BLAKE3"; enum { length = 32 }; static void apply(const char * begin, const size_t size, unsigned char * out_char_data) { static_assert(LLVM_BLAKE3_OUT_LEN == ImplBLAKE3::length); auto & result = *reinterpret_cast *>(out_char_data); llvm::BLAKE3 hasher; if (size > 0) hasher.update(llvm::StringRef(begin, size)); hasher.final(result); } }; #endif template class FunctionStringHashFixedString : public IFunction { public: static constexpr auto name = Impl::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0])) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); return std::make_shared(Impl::length); } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { if (const ColumnString * col_from = checkAndGetColumn(arguments[0].column.get())) { auto col_to = ColumnFixedString::create(Impl::length); const typename ColumnString::Chars & data = col_from->getChars(); const typename ColumnString::Offsets & offsets = col_from->getOffsets(); auto & chars_to = col_to->getChars(); const auto size = offsets.size(); chars_to.resize(size * Impl::length); ColumnString::Offset current_offset = 0; for (size_t i = 0; i < size; ++i) { Impl::apply( reinterpret_cast(&data[current_offset]), offsets[i] - current_offset - 1, reinterpret_cast(&chars_to[i * Impl::length])); current_offset = offsets[i]; } return col_to; } else if (const ColumnFixedString * col_from_fix = checkAndGetColumn(arguments[0].column.get())) { auto col_to = ColumnFixedString::create(Impl::length); const typename ColumnFixedString::Chars & data = col_from_fix->getChars(); const auto size = col_from_fix->size(); auto & chars_to = col_to->getChars(); const auto length = col_from_fix->getN(); chars_to.resize(size * Impl::length); for (size_t i = 0; i < size; ++i) { Impl::apply( reinterpret_cast(&data[i * length]), length, reinterpret_cast(&chars_to[i * Impl::length])); } return col_to; } else if (const ColumnIPv6 * col_from_ip = checkAndGetColumn(arguments[0].column.get())) { auto col_to = ColumnFixedString::create(Impl::length); const typename ColumnIPv6::Container & data = col_from_ip->getData(); const auto size = col_from_ip->size(); auto & chars_to = col_to->getChars(); const auto length = IPV6_BINARY_LENGTH; chars_to.resize(size * Impl::length); for (size_t i = 0; i < size; ++i) { Impl::apply( reinterpret_cast(&data[i]), length, reinterpret_cast(&chars_to[i * Impl::length])); } return col_to; } else throw Exception( ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); } }; #if USE_SSL || USE_BLAKE3 REGISTER_FUNCTION(HashFixedStrings) { # if USE_SSL using FunctionMD4 = FunctionStringHashFixedString; using FunctionMD5 = FunctionStringHashFixedString; using FunctionSHA1 = FunctionStringHashFixedString; using FunctionSHA224 = FunctionStringHashFixedString; using FunctionSHA256 = FunctionStringHashFixedString; using FunctionSHA384 = FunctionStringHashFixedString; using FunctionSHA512 = FunctionStringHashFixedString; using FunctionSHA512_256 = FunctionStringHashFixedString; factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the MD4 hash of the given string.)", .syntax = "SELECT MD4(s);", .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, .returned_value = "The MD4 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).", .examples = {{"", "SELECT HEX(MD4('abc'));", R"( ┌─hex(MD4('abc'))──────────────────┐ │ A448017AAF21D8525FC10AE87AA6729D │ └──────────────────────────────────┘ )"}}}); factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the MD5 hash of the given string.)", .syntax = "SELECT MD5(s);", .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, .returned_value = "The MD5 hash of the given input string returned as a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).", .examples = {{"", "SELECT HEX(MD5('abc'));", R"( ┌─hex(MD5('abc'))──────────────────┐ │ 900150983CD24FB0D6963F7D28E17F72 │ └──────────────────────────────────┘ )"}}}); factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the SHA1 hash of the given string.)", .syntax = "SELECT SHA1(s);", .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, .returned_value = "The SHA1 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", .examples = {{"", "SELECT HEX(SHA1('abc'));", R"( ┌─hex(SHA1('abc'))─────────────────────────┐ │ A9993E364706816ABA3E25717850C26C9CD0D89D │ └──────────────────────────────────────────┘ )"}}}); factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the SHA224 hash of the given string.)", .syntax = "SELECT SHA224(s);", .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, .returned_value = "The SHA224 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", .examples = {{"", "SELECT HEX(SHA224('abc'));", R"( ┌─hex(SHA224('abc'))───────────────────────────────────────┐ │ 23097D223405D8228642A477BDA255B32AADBCE4BDA0B3F7E36C9DA7 │ └──────────────────────────────────────────────────────────┘ )"}}}); factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the SHA256 hash of the given string.)", .syntax = "SELECT SHA256(s);", .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, .returned_value = "The SHA256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", .examples = {{"", "SELECT HEX(SHA256('abc'));", R"( ┌─hex(SHA256('abc'))───────────────────────────────────────────────┐ │ BA7816BF8F01CFEA414140DE5DAE2223B00361A396177A9CB410FF61F20015AD │ └──────────────────────────────────────────────────────────────────┘ )"}}}); factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the SHA384 hash of the given string.)", .syntax = "SELECT SHA384(s);", .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, .returned_value = "The SHA384 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", .examples = {{"", "SELECT HEX(SHA384('abc'));", R"( ┌─hex(SHA384('abc'))───────────────────────────────────────────────────────────────────────────────┐ │ CB00753F45A35E8BB5A03D699AC65007272C32AB0EDED1631A8B605A43FF5BED8086072BA1E7CC2358BAECA134C825A7 │ └──────────────────────────────────────────────────────────────────────────────────────────────────┘ )"}}}); factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the SHA512 hash of the given string.)", .syntax = "SELECT SHA512(s);", .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, .returned_value = "The SHA512 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", .examples = {{"", "SELECT HEX(SHA512('abc'));", R"( ┌─hex(SHA512('abc'))───────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ DDAF35A193617ABACC417349AE20413112E6FA4E89A97EA20A9EEEE64B55D39A2192992A274FC1A836BA3C23A3FEEBBD454D4423643CE80E2A9AC94FA54CA49F │ └──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ )"}}}); factory.registerFunction(FunctionDocumentation{ .description = R"(Calculates the SHA512_256 hash of the given string.)", .syntax = "SELECT SHA512_256(s);", .arguments = {{"s", "The input [String](../../sql-reference/data-types/string.md)."}}, .returned_value = "The SHA512_256 hash of the given input string returned as a [FixedString](../../sql-reference/data-types/fixedstring.md).", .examples = {{"", "SELECT HEX(SHA512_256('abc'));", R"( ┌─hex(SHA512_256('abc'))───────────────────────────────────────────┐ │ 53048E2681941EF99B2E29B76B4C7DABE4C2D0C634FC6D46E0E2F13107E7AF23 │ └──────────────────────────────────────────────────────────────────┘ )"}}}); # endif # if USE_BLAKE3 using FunctionBLAKE3 = FunctionStringHashFixedString; factory.registerFunction( FunctionDocumentation{ .description = R"( Calculates BLAKE3 hash string and returns the resulting set of bytes as FixedString. This cryptographic hash-function is integrated into ClickHouse with BLAKE3 Rust library. The function is rather fast and shows approximately two times faster performance compared to SHA-2, while generating hashes of the same length as SHA-256. It returns a BLAKE3 hash as a byte array with type FixedString(32). )", .examples{{"hash", "SELECT hex(BLAKE3('ABC'))", ""}}, .categories{"Hash"}}, FunctionFactory::CaseSensitive); # endif } #endif }