mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 16:50:48 +00:00
Merge pull request #7480 from azat/crc-v3
Add CRC32IEEE()/CRC64() support
This commit is contained in:
commit
6e2af3db41
10
contrib/CMakeLists.txt
vendored
10
contrib/CMakeLists.txt
vendored
@ -124,7 +124,7 @@ if (USE_INTERNAL_SSL_LIBRARY)
|
||||
add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY})
|
||||
endif ()
|
||||
|
||||
if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
|
||||
function(mysql_support)
|
||||
set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC)
|
||||
set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC)
|
||||
set(CLIENT_PLUGIN_REMOTE_IO OFF)
|
||||
@ -136,7 +136,15 @@ if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
|
||||
if (GLIBC_COMPATIBILITY)
|
||||
set(LIBM glibc-compatibility)
|
||||
endif()
|
||||
if (USE_INTERNAL_ZLIB_LIBRARY)
|
||||
set(ZLIB_FOUND ON)
|
||||
set(ZLIB_LIBRARY zlibstatic)
|
||||
set(WITH_EXTERNAL_ZLIB ON)
|
||||
endif()
|
||||
add_subdirectory (mariadb-connector-c)
|
||||
endfunction()
|
||||
if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
|
||||
mysql_support()
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_RDKAFKA_LIBRARY)
|
||||
|
146
dbms/src/Functions/CRC.cpp
Normal file
146
dbms/src/Functions/CRC.cpp
Normal file
@ -0,0 +1,146 @@
|
||||
#include <zlib.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringOrArrayToT.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <class T>
|
||||
struct CRCBase
|
||||
{
|
||||
T tab[256];
|
||||
CRCBase(T polynomial)
|
||||
{
|
||||
for (size_t i = 0; i < 256; ++i)
|
||||
{
|
||||
T c = i;
|
||||
for (size_t j = 0; j < 8; ++j)
|
||||
{
|
||||
c = c & 1 ? polynomial ^ (c >> 1) : c >> 1;
|
||||
}
|
||||
tab[i] = c;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template <class T, T polynomial>
|
||||
struct CRCImpl
|
||||
{
|
||||
using ReturnType = T;
|
||||
|
||||
static T make_crc(const unsigned char *buf, size_t size)
|
||||
{
|
||||
static CRCBase<ReturnType> base(polynomial);
|
||||
|
||||
T i, crc;
|
||||
|
||||
crc = 0;
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
|
||||
}
|
||||
return crc;
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr UInt64 CRC64_ECMA = 0xc96c5795d7870f42ULL;
|
||||
struct CRC64ECMAImpl : public CRCImpl<UInt64, CRC64_ECMA>
|
||||
{
|
||||
static constexpr auto name = "CRC64";
|
||||
};
|
||||
|
||||
static constexpr UInt32 CRC32_IEEE = 0xedb88320;
|
||||
struct CRC32IEEEImpl : public CRCImpl<UInt32, CRC32_IEEE>
|
||||
{
|
||||
static constexpr auto name = "CRC32IEEE";
|
||||
};
|
||||
|
||||
struct CRC32ZLIBImpl
|
||||
{
|
||||
using ReturnType = UInt32;
|
||||
static constexpr auto name = "CRC32";
|
||||
|
||||
static UInt32 make_crc(const unsigned char *buf, size_t size)
|
||||
{ return crc32_z(0L, buf, size); }
|
||||
};
|
||||
|
||||
} // \anonymous
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
struct CRCFunctionWrapper
|
||||
{
|
||||
static constexpr auto is_fixed_to_constant = true;
|
||||
using ReturnType = typename Impl::ReturnType;
|
||||
|
||||
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res)
|
||||
{
|
||||
size_t size = offsets.size();
|
||||
|
||||
ColumnString::Offset prev_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = do_crc(data, prev_offset, offsets[i] - prev_offset - 1);
|
||||
prev_offset = offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = do_crc(data, 0, n); }
|
||||
|
||||
static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res)
|
||||
{
|
||||
size_t size = data.size() / n;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = do_crc(data, i * n, n);
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<ReturnType> & /*res*/)
|
||||
{
|
||||
throw Exception("Cannot apply function " + std::string(Impl::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
private:
|
||||
static ReturnType do_crc(const ColumnString::Chars & buf, size_t offset, size_t size)
|
||||
{
|
||||
const unsigned char * p = reinterpret_cast<const unsigned char *>(&buf[0]) + offset;
|
||||
return Impl::make_crc(p, size);
|
||||
}
|
||||
};
|
||||
|
||||
template <class T>
|
||||
using FunctionCRC = FunctionStringOrArrayToT<CRCFunctionWrapper<T>, T, typename T::ReturnType>;
|
||||
// The same as IEEE variant, but uses 0xffffffff as initial value
|
||||
// This is the default
|
||||
//
|
||||
// (And zlib is used here, since it has optimized version)
|
||||
using FunctionCRC32ZLIB = FunctionCRC<CRC32ZLIBImpl>;
|
||||
// Uses CRC-32-IEEE 802.3 polynomial
|
||||
using FunctionCRC32IEEE = FunctionCRC<CRC32IEEEImpl>;
|
||||
// Uses CRC-64-ECMA polynomial
|
||||
using FunctionCRC64ECMA = FunctionCRC<CRC64ECMAImpl>;
|
||||
|
||||
template <class T>
|
||||
void registerFunctionCRCImpl(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<T>(T::name, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
void registerFunctionCRC(FunctionFactory & factory)
|
||||
{
|
||||
registerFunctionCRCImpl<FunctionCRC32ZLIB>(factory);
|
||||
registerFunctionCRCImpl<FunctionCRC32IEEE>(factory);
|
||||
registerFunctionCRCImpl<FunctionCRC64ECMA>(factory);
|
||||
}
|
||||
|
||||
}
|
@ -1,68 +0,0 @@
|
||||
#include <zlib.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionStringOrArrayToT.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
}
|
||||
|
||||
/** Calculates the CRC32 of a string
|
||||
*/
|
||||
struct CRC32Impl
|
||||
{
|
||||
static constexpr auto is_fixed_to_constant = true;
|
||||
|
||||
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt32> & res)
|
||||
{
|
||||
size_t size = offsets.size();
|
||||
|
||||
ColumnString::Offset prev_offset = 0;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = do_crc32(data, prev_offset, offsets[i] - prev_offset - 1);
|
||||
prev_offset = offsets[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, UInt32 & res) { res = do_crc32(data, 0, n); }
|
||||
|
||||
static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt32> & res)
|
||||
{
|
||||
size_t size = data.size() / n;
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
res[i] = do_crc32(data, i * n, n);
|
||||
}
|
||||
}
|
||||
|
||||
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<UInt32> & /*res*/)
|
||||
{
|
||||
throw Exception("Cannot apply function CRC32 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
|
||||
private:
|
||||
static uint32_t do_crc32(const ColumnString::Chars & buf, size_t offset, size_t size)
|
||||
{
|
||||
const unsigned char * p = reinterpret_cast<const unsigned char *>(&buf[0]) + offset;
|
||||
return crc32(0L, p, size);
|
||||
}
|
||||
};
|
||||
|
||||
struct NameCRC32
|
||||
{
|
||||
static constexpr auto name = "CRC32";
|
||||
};
|
||||
using FunctionCRC32 = FunctionStringOrArrayToT<CRC32Impl, NameCRC32, UInt32>;
|
||||
|
||||
void registerFunctionCRC32(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionCRC32>(NameCRC32::name, FunctionFactory::CaseInsensitive);
|
||||
}
|
||||
|
||||
}
|
@ -20,7 +20,7 @@ void registerFunctionReverseUTF8(FunctionFactory &);
|
||||
void registerFunctionsConcat(FunctionFactory &);
|
||||
void registerFunctionFormat(FunctionFactory &);
|
||||
void registerFunctionSubstring(FunctionFactory &);
|
||||
void registerFunctionCRC32(FunctionFactory &);
|
||||
void registerFunctionCRC(FunctionFactory &);
|
||||
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
|
||||
void registerFunctionStartsWith(FunctionFactory &);
|
||||
void registerFunctionEndsWith(FunctionFactory &);
|
||||
@ -47,7 +47,7 @@ void registerFunctionsString(FunctionFactory & factory)
|
||||
registerFunctionLowerUTF8(factory);
|
||||
registerFunctionUpperUTF8(factory);
|
||||
registerFunctionReverse(factory);
|
||||
registerFunctionCRC32(factory);
|
||||
registerFunctionCRC(factory);
|
||||
registerFunctionReverseUTF8(factory);
|
||||
registerFunctionsConcat(factory);
|
||||
registerFunctionFormat(factory);
|
||||
|
@ -20,3 +20,7 @@ qwerty string 55151997 2663297705
|
||||
qqq aaa 3142898280 4027020077
|
||||
zxcqwer 3358319860 0
|
||||
aasq xxz 3369829874 4069886758
|
||||
CRC32IEEE()
|
||||
7332BC33
|
||||
CRC64()
|
||||
72D5B9EA0B70CE1E
|
@ -18,3 +18,8 @@ select CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2);
|
||||
select str1, str2, CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2);
|
||||
|
||||
DROP TABLE table1;
|
||||
|
||||
SELECT 'CRC32IEEE()';
|
||||
SELECT hex(CRC32IEEE('foo'));
|
||||
SELECT 'CRC64()';
|
||||
SELECT hex(CRC64('foo'));
|
@ -195,7 +195,20 @@ Returns a string that removes the whitespace characters on either side.
|
||||
|
||||
## CRC32(s)
|
||||
|
||||
Returns the CRC32 checksum of a string
|
||||
Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation).
|
||||
|
||||
The result type is UInt32.
|
||||
|
||||
## CRC32IEEE(s)
|
||||
|
||||
Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial.
|
||||
|
||||
The result type is UInt32.
|
||||
|
||||
## CRC64(s)
|
||||
|
||||
Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial.
|
||||
|
||||
The result type is UInt64.
|
||||
|
||||
[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) <!--hide-->
|
||||
|
@ -155,7 +155,20 @@ SELECT startsWith('Hello, world!', 'He');
|
||||
|
||||
## CRC32(s)
|
||||
|
||||
Возвращает чексумму CRC32 данной строки.
|
||||
Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен и начальным значением `0xffffffff` (т.к. используется реализация из zlib).
|
||||
|
||||
Тип результата - UInt32.
|
||||
|
||||
## CRC32IEEE(s)
|
||||
|
||||
Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен.
|
||||
|
||||
Тип результата - UInt32.
|
||||
|
||||
## CRC64(s)
|
||||
|
||||
Возвращает чексумму CRC64 данной строки, используется CRC-64-ECMA многочлен.
|
||||
|
||||
Тип результата - UInt64.
|
||||
|
||||
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/) <!--hide-->
|
||||
|
Loading…
Reference in New Issue
Block a user