Merge pull request #7480 from azat/crc-v3

Add CRC32IEEE()/CRC64() support
This commit is contained in:
Alexander Kuzmenkov 2019-10-28 16:16:37 +03:00 committed by GitHub
commit 6e2af3db41
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 194 additions and 73 deletions

View File

@ -124,7 +124,7 @@ if (USE_INTERNAL_SSL_LIBRARY)
add_library(OpenSSL::SSL ALIAS ${OPENSSL_SSL_LIBRARY})
endif ()
if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
function(mysql_support)
set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC)
set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC)
set(CLIENT_PLUGIN_REMOTE_IO OFF)
@ -136,7 +136,15 @@ if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
if (GLIBC_COMPATIBILITY)
set(LIBM glibc-compatibility)
endif()
if (USE_INTERNAL_ZLIB_LIBRARY)
set(ZLIB_FOUND ON)
set(ZLIB_LIBRARY zlibstatic)
set(WITH_EXTERNAL_ZLIB ON)
endif()
add_subdirectory (mariadb-connector-c)
endfunction()
if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
mysql_support()
endif ()
if (USE_INTERNAL_RDKAFKA_LIBRARY)

146
dbms/src/Functions/CRC.cpp Normal file
View File

@ -0,0 +1,146 @@
#include <zlib.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringOrArrayToT.h>
namespace
{
template <class T>
struct CRCBase
{
T tab[256];
CRCBase(T polynomial)
{
for (size_t i = 0; i < 256; ++i)
{
T c = i;
for (size_t j = 0; j < 8; ++j)
{
c = c & 1 ? polynomial ^ (c >> 1) : c >> 1;
}
tab[i] = c;
}
}
};
template <class T, T polynomial>
struct CRCImpl
{
using ReturnType = T;
static T make_crc(const unsigned char *buf, size_t size)
{
static CRCBase<ReturnType> base(polynomial);
T i, crc;
crc = 0;
for (i = 0; i < size; i++)
{
crc = base.tab[(crc ^ buf[i]) & 0xff] ^ (crc >> 8);
}
return crc;
}
};
static constexpr UInt64 CRC64_ECMA = 0xc96c5795d7870f42ULL;
struct CRC64ECMAImpl : public CRCImpl<UInt64, CRC64_ECMA>
{
static constexpr auto name = "CRC64";
};
static constexpr UInt32 CRC32_IEEE = 0xedb88320;
struct CRC32IEEEImpl : public CRCImpl<UInt32, CRC32_IEEE>
{
static constexpr auto name = "CRC32IEEE";
};
struct CRC32ZLIBImpl
{
using ReturnType = UInt32;
static constexpr auto name = "CRC32";
static UInt32 make_crc(const unsigned char *buf, size_t size)
{ return crc32_z(0L, buf, size); }
};
} // \anonymous
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <class Impl>
struct CRCFunctionWrapper
{
static constexpr auto is_fixed_to_constant = true;
using ReturnType = typename Impl::ReturnType;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<ReturnType> & res)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
res[i] = do_crc(data, prev_offset, offsets[i] - prev_offset - 1);
prev_offset = offsets[i];
}
}
static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, ReturnType & res) { res = do_crc(data, 0, n); }
static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<ReturnType> & res)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
{
res[i] = do_crc(data, i * n, n);
}
}
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<ReturnType> & /*res*/)
{
throw Exception("Cannot apply function " + std::string(Impl::name) + " to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
private:
static ReturnType do_crc(const ColumnString::Chars & buf, size_t offset, size_t size)
{
const unsigned char * p = reinterpret_cast<const unsigned char *>(&buf[0]) + offset;
return Impl::make_crc(p, size);
}
};
template <class T>
using FunctionCRC = FunctionStringOrArrayToT<CRCFunctionWrapper<T>, T, typename T::ReturnType>;
// The same as IEEE variant, but uses 0xffffffff as initial value
// This is the default
//
// (And zlib is used here, since it has optimized version)
using FunctionCRC32ZLIB = FunctionCRC<CRC32ZLIBImpl>;
// Uses CRC-32-IEEE 802.3 polynomial
using FunctionCRC32IEEE = FunctionCRC<CRC32IEEEImpl>;
// Uses CRC-64-ECMA polynomial
using FunctionCRC64ECMA = FunctionCRC<CRC64ECMAImpl>;
template <class T>
void registerFunctionCRCImpl(FunctionFactory & factory)
{
factory.registerFunction<T>(T::name, FunctionFactory::CaseInsensitive);
}
void registerFunctionCRC(FunctionFactory & factory)
{
registerFunctionCRCImpl<FunctionCRC32ZLIB>(factory);
registerFunctionCRCImpl<FunctionCRC32IEEE>(factory);
registerFunctionCRCImpl<FunctionCRC64ECMA>(factory);
}
}

View File

@ -1,68 +0,0 @@
#include <zlib.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringOrArrayToT.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** Calculates the CRC32 of a string
*/
struct CRC32Impl
{
static constexpr auto is_fixed_to_constant = true;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt32> & res)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
res[i] = do_crc32(data, prev_offset, offsets[i] - prev_offset - 1);
prev_offset = offsets[i];
}
}
static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, UInt32 & res) { res = do_crc32(data, 0, n); }
static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt32> & res)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
{
res[i] = do_crc32(data, i * n, n);
}
}
[[noreturn]] static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<UInt32> & /*res*/)
{
throw Exception("Cannot apply function CRC32 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
private:
static uint32_t do_crc32(const ColumnString::Chars & buf, size_t offset, size_t size)
{
const unsigned char * p = reinterpret_cast<const unsigned char *>(&buf[0]) + offset;
return crc32(0L, p, size);
}
};
struct NameCRC32
{
static constexpr auto name = "CRC32";
};
using FunctionCRC32 = FunctionStringOrArrayToT<CRC32Impl, NameCRC32, UInt32>;
void registerFunctionCRC32(FunctionFactory & factory)
{
factory.registerFunction<FunctionCRC32>(NameCRC32::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -20,7 +20,7 @@ void registerFunctionReverseUTF8(FunctionFactory &);
void registerFunctionsConcat(FunctionFactory &);
void registerFunctionFormat(FunctionFactory &);
void registerFunctionSubstring(FunctionFactory &);
void registerFunctionCRC32(FunctionFactory &);
void registerFunctionCRC(FunctionFactory &);
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
void registerFunctionStartsWith(FunctionFactory &);
void registerFunctionEndsWith(FunctionFactory &);
@ -47,7 +47,7 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionLowerUTF8(factory);
registerFunctionUpperUTF8(factory);
registerFunctionReverse(factory);
registerFunctionCRC32(factory);
registerFunctionCRC(factory);
registerFunctionReverseUTF8(factory);
registerFunctionsConcat(factory);
registerFunctionFormat(factory);

View File

@ -20,3 +20,7 @@ qwerty string 55151997 2663297705
qqq aaa 3142898280 4027020077
zxcqwer 3358319860 0
aasq xxz 3369829874 4069886758
CRC32IEEE()
7332BC33
CRC64()
72D5B9EA0B70CE1E

View File

@ -18,3 +18,8 @@ select CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2);
select str1, str2, CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2);
DROP TABLE table1;
SELECT 'CRC32IEEE()';
SELECT hex(CRC32IEEE('foo'));
SELECT 'CRC64()';
SELECT hex(CRC64('foo'));

View File

@ -195,7 +195,20 @@ Returns a string that removes the whitespace characters on either side.
## CRC32(s)
Returns the CRC32 checksum of a string
Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation).
The result type is UInt32.
## CRC32IEEE(s)
Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial.
The result type is UInt32.
## CRC64(s)
Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial.
The result type is UInt64.
[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) <!--hide-->

View File

@ -155,7 +155,20 @@ SELECT startsWith('Hello, world!', 'He');
## CRC32(s)
Возвращает чексумму CRC32 данной строки.
Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен и начальным значением `0xffffffff` (т.к. используется реализация из zlib).
Тип результата - UInt32.
## CRC32IEEE(s)
Возвращает чексумму CRC32 данной строки, используется CRC-32-IEEE 802.3 многочлен.
Тип результата - UInt32.
## CRC64(s)
Возвращает чексумму CRC64 данной строки, используется CRC-64-ECMA многочлен.
Тип результата - UInt64.
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/) <!--hide-->