diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 06c33fb7e74..b0a271b21ac 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -124,7 +124,7 @@ if (USE_INTERNAL_SSL_LIBRARY) add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY}) endif () -if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY) +function(mysql_support) set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC) set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC) set(CLIENT_PLUGIN_REMOTE_IO OFF) @@ -136,7 +136,15 @@ if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY) if (GLIBC_COMPATIBILITY) set(LIBM glibc-compatibility) endif() + if (USE_INTERNAL_ZLIB_LIBRARY) + set(ZLIB_FOUND ON) + set(ZLIB_LIBRARY zlibstatic) + set(WITH_EXTERNAL_ZLIB ON) + endif() add_subdirectory (mariadb-connector-c) +endfunction() +if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY) + mysql_support() endif () if (USE_INTERNAL_RDKAFKA_LIBRARY) diff --git a/dbms/src/Functions/CRC.cpp b/dbms/src/Functions/CRC.cpp new file mode 100644 index 00000000000..e506812d94c --- /dev/null +++ b/dbms/src/Functions/CRC.cpp @@ -0,0 +1,146 @@ +#include +#include +#include +#include + +namespace +{ + +template +struct CRCBase +{ + T tab[256]; + CRCBase(T polynomial) + { + for (size_t i = 0; i < 256; ++i) + { + T c = i; + for (size_t j = 0; j < 8; ++j) + { + c = c & 1 ? polynomial ^ (c >> 1) : c >> 1; + } + tab[i] = c; + } + } +}; + +template +struct CRCImpl +{ + using ReturnType = T; + + static T make_crc(const unsigned char *buf, size_t size) + { + static CRCBase base(polynomial); + + T i, crc; + + crc = 0; + for (i = 0; i < size; i++) + { + crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8); + } + return crc; + } +}; + +static constexpr UInt64 CRC64_ECMA = 0xc96c5795d7870f42ULL; +struct CRC64ECMAImpl : public CRCImpl +{ + static constexpr auto name = "CRC64"; +}; + +static constexpr UInt32 CRC32_IEEE = 0xedb88320; +struct CRC32IEEEImpl : public CRCImpl +{ + static constexpr auto name = "CRC32IEEE"; +}; + +struct CRC32ZLIBImpl +{ + using ReturnType = UInt32; + static constexpr auto name = "CRC32"; + + static UInt32 make_crc(const unsigned char *buf, size_t size) + { return crc32_z(0L, buf, size); } +}; + +} // \anonymous + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; +} + +template +struct CRCFunctionWrapper +{ + static constexpr auto is_fixed_to_constant = true; + using ReturnType = typename Impl::ReturnType; + + static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) + { + size_t size = offsets.size(); + + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + res[i] = do_crc(data, prev_offset, offsets[i] - prev_offset - 1); + prev_offset = offsets[i]; + } + } + + static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = do_crc(data, 0, n); } + + static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) + { + size_t size = data.size() / n; + + for (size_t i = 0; i < size; ++i) + { + res[i] = do_crc(data, i * n, n); + } + } + + [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) + { + throw Exception("Cannot apply function " + std::string(Impl::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + +private: + static ReturnType do_crc(const ColumnString::Chars & buf, size_t offset, size_t size) + { + const unsigned char * p = reinterpret_cast(&buf[0]) + offset; + return Impl::make_crc(p, size); + } +}; + +template +using FunctionCRC = FunctionStringOrArrayToT, T, typename T::ReturnType>; +// The same as IEEE variant, but uses 0xffffffff as initial value +// This is the default +// +// (And zlib is used here, since it has optimized version) +using FunctionCRC32ZLIB = FunctionCRC; +// Uses CRC-32-IEEE 802.3 polynomial +using FunctionCRC32IEEE = FunctionCRC; +// Uses CRC-64-ECMA polynomial +using FunctionCRC64ECMA = FunctionCRC; + +template +void registerFunctionCRCImpl(FunctionFactory & factory) +{ + factory.registerFunction(T::name, FunctionFactory::CaseInsensitive); +} + +void registerFunctionCRC(FunctionFactory & factory) +{ + registerFunctionCRCImpl(factory); + registerFunctionCRCImpl(factory); + registerFunctionCRCImpl(factory); +} + +} diff --git a/dbms/src/Functions/CRC32.cpp b/dbms/src/Functions/CRC32.cpp deleted file mode 100644 index 80e0f163571..00000000000 --- a/dbms/src/Functions/CRC32.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int ILLEGAL_TYPE_OF_ARGUMENT; -} - -/** Calculates the CRC32 of a string - */ -struct CRC32Impl -{ - static constexpr auto is_fixed_to_constant = true; - - static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray & res) - { - size_t size = offsets.size(); - - ColumnString::Offset prev_offset = 0; - for (size_t i = 0; i < size; ++i) - { - res[i] = do_crc32(data, prev_offset, offsets[i] - prev_offset - 1); - prev_offset = offsets[i]; - } - } - - static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, UInt32 & res) { res = do_crc32(data, 0, n); } - - static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray & res) - { - size_t size = data.size() / n; - - for (size_t i = 0; i < size; ++i) - { - res[i] = do_crc32(data, i * n, n); - } - } - - [[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray & /*res*/) - { - throw Exception("Cannot apply function CRC32 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - -private: - static uint32_t do_crc32(const ColumnString::Chars & buf, size_t offset, size_t size) - { - const unsigned char * p = reinterpret_cast(&buf[0]) + offset; - return crc32(0L, p, size); - } -}; - -struct NameCRC32 -{ - static constexpr auto name = "CRC32"; -}; -using FunctionCRC32 = FunctionStringOrArrayToT; - -void registerFunctionCRC32(FunctionFactory & factory) -{ - factory.registerFunction(NameCRC32::name, FunctionFactory::CaseInsensitive); -} - -} diff --git a/dbms/src/Functions/registerFunctionsString.cpp b/dbms/src/Functions/registerFunctionsString.cpp index 1f4219b18f2..df407750d35 100644 --- a/dbms/src/Functions/registerFunctionsString.cpp +++ b/dbms/src/Functions/registerFunctionsString.cpp @@ -20,7 +20,7 @@ void registerFunctionReverseUTF8(FunctionFactory &); void registerFunctionsConcat(FunctionFactory &); void registerFunctionFormat(FunctionFactory &); void registerFunctionSubstring(FunctionFactory &); -void registerFunctionCRC32(FunctionFactory &); +void registerFunctionCRC(FunctionFactory &); void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &); void registerFunctionStartsWith(FunctionFactory &); void registerFunctionEndsWith(FunctionFactory &); @@ -47,7 +47,7 @@ void registerFunctionsString(FunctionFactory & factory) registerFunctionLowerUTF8(factory); registerFunctionUpperUTF8(factory); registerFunctionReverse(factory); - registerFunctionCRC32(factory); + registerFunctionCRC(factory); registerFunctionReverseUTF8(factory); registerFunctionsConcat(factory); registerFunctionFormat(factory); diff --git a/dbms/tests/queries/0_stateless/00936_crc32_function.reference b/dbms/tests/queries/0_stateless/00936_crc_functions.reference similarity index 87% rename from dbms/tests/queries/0_stateless/00936_crc32_function.reference rename to dbms/tests/queries/0_stateless/00936_crc_functions.reference index 90c6a41551b..1431a2e654b 100644 --- a/dbms/tests/queries/0_stateless/00936_crc32_function.reference +++ b/dbms/tests/queries/0_stateless/00936_crc_functions.reference @@ -20,3 +20,7 @@ qwerty string 55151997 2663297705 qqq aaa 3142898280 4027020077 zxcqwer 3358319860 0 aasq xxz 3369829874 4069886758 +CRC32IEEE() +7332BC33 +CRC64() +72D5B9EA0B70CE1E diff --git a/dbms/tests/queries/0_stateless/00936_crc32_function.sql b/dbms/tests/queries/0_stateless/00936_crc_functions.sql similarity index 88% rename from dbms/tests/queries/0_stateless/00936_crc32_function.sql rename to dbms/tests/queries/0_stateless/00936_crc_functions.sql index 1bc9d9ec246..fd324ea23fa 100644 --- a/dbms/tests/queries/0_stateless/00936_crc32_function.sql +++ b/dbms/tests/queries/0_stateless/00936_crc_functions.sql @@ -18,3 +18,8 @@ select CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2); select str1, str2, CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2); DROP TABLE table1; + +SELECT 'CRC32IEEE()'; +SELECT hex(CRC32IEEE('foo')); +SELECT 'CRC64()'; +SELECT hex(CRC64('foo')); diff --git a/docs/en/query_language/functions/string_functions.md b/docs/en/query_language/functions/string_functions.md index 02a8e1d64aa..32186bfb74e 100644 --- a/docs/en/query_language/functions/string_functions.md +++ b/docs/en/query_language/functions/string_functions.md @@ -195,7 +195,20 @@ Returns a string that removes the whitespace characters on either side. ## CRC32(s) -Returns the CRC32 checksum of a string +Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation). + The result type is UInt32. +## CRC32IEEE(s) + +Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial. + +The result type is UInt32. + +## CRC64(s) + +Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial. + +The result type is UInt64. + [Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) diff --git a/docs/ru/query_language/functions/string_functions.md b/docs/ru/query_language/functions/string_functions.md index 193da6f2753..e6753247ea9 100644 --- a/docs/ru/query_language/functions/string_functions.md +++ b/docs/ru/query_language/functions/string_functions.md @@ -155,7 +155,20 @@ SELECT startsWith('Hello, world!', 'He'); ## CRC32(s) -Возвращает чексумму CRC32 данной строки. +Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен и начальным значением `0xffffffff` (т.к. используется реализация из zlib). + Тип результата - UInt32. +## CRC32IEEE(s) + +Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен. + +Тип результата - UInt32. + +## CRC64(s) + +Возвращает чексумму CRC64 данной строки, используется CRC-64-ECMA многочлен. + +Тип результата - UInt64. + [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/)