Merge pull request #5661 from BHYCHIK/crc32-for-master

Support crc32 function
This commit is contained in:
alexey-milovidov 2019-06-20 01:32:00 +03:00 committed by GitHub
commit c899fce9fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 123 additions and 3 deletions

View File

@ -0,0 +1,68 @@
#include <zlib.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringOrArrayToT.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
/** Calculates the CRC32 of a string
*/
struct CRC32Impl
{
static constexpr auto is_fixed_to_constant = true;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt32> & res)
{
size_t size = offsets.size();
ColumnString::Offset prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
res[i] = do_crc32(data, prev_offset, offsets[i] - prev_offset - 1);
prev_offset = offsets[i];
}
}
static void vector_fixed_to_constant(const ColumnString::Chars & data, size_t n, UInt32 & res) { res = do_crc32(data, 0, n); }
static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt32> & res)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
{
res[i] = do_crc32(data, i * n, n);
}
}
static void array(const ColumnString::Offsets & /*offsets*/, PaddedPODArray<UInt32> & /*res*/)
{
throw Exception("Cannot apply function CRC32 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
private:
static uint32_t do_crc32(const ColumnString::Chars & buf, size_t offset, size_t size)
{
const unsigned char * p = reinterpret_cast<const unsigned char *>(&buf[0]) + offset;
return crc32(0L, p, size);
}
};
struct NameCRC32
{
static constexpr auto name = "CRC32";
};
using FunctionCRC32 = FunctionStringOrArrayToT<CRC32Impl, NameCRC32, UInt32>;
void registerFunctionCRC32(FunctionFactory & factory)
{
factory.registerFunction<FunctionCRC32>(NameCRC32::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -2,7 +2,6 @@
namespace DB
{
class FunctionFactory;
void registerFunctionEmpty(FunctionFactory &);
@ -20,6 +19,7 @@ void registerFunctionReverseUTF8(FunctionFactory &);
void registerFunctionsConcat(FunctionFactory &);
void registerFunctionFormat(FunctionFactory &);
void registerFunctionSubstring(FunctionFactory &);
void registerFunctionCRC32(FunctionFactory &);
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
void registerFunctionStartsWith(FunctionFactory &);
void registerFunctionEndsWith(FunctionFactory &);
@ -45,6 +45,7 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionLowerUTF8(factory);
registerFunctionUpperUTF8(factory);
registerFunctionReverse(factory);
registerFunctionCRC32(factory);
registerFunctionReverseUTF8(factory);
registerFunctionsConcat(factory);
registerFunctionFormat(factory);
@ -62,4 +63,3 @@ void registerFunctionsString(FunctionFactory & factory)
}
}

View File

@ -31,6 +31,7 @@
<value>hiveHash</value>
<value>xxHash32</value>
<value>xxHash64</value>
<value>CRC32</value>
</values>
</substitution>
<substitution>

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,22 @@
2663297705
2663297705 3632233996
0
55151997
3142898280
3358319860
3369829874
0
0
2663297705
4027020077
4069886758
0 0
55151997 2663297705
3142898280 4027020077
3358319860 0
3369829874 4069886758
0 0
qwerty string 55151997 2663297705
qqq aaa 3142898280 4027020077
zxcqwer 3358319860 0
aasq xxz 3369829874 4069886758

View File

@ -0,0 +1,20 @@
USE test;
DROP TABLE IF EXISTS table1;
CREATE TABLE table1 (str1 String, str2 String) ENGINE = Memory;
INSERT INTO table1 VALUES('qwerty', 'string');
INSERT INTO table1 VALUES('qqq', 'aaa');
INSERT INTO table1 VALUES('aasq', 'xxz');
INSERT INTO table1 VALUES('zxcqwer', '');
INSERT INTO table1 VALUES('', '');
select CRC32('string');
select CrC32('string'), crc32('test'); -- We want to test, that function name is case-insensitive
select CRC32(str1) from table1 order by CRC32(str1);
select CRC32(str2) from table1 order by CRC32(str2);
select CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2);
select str1, str2, CRC32(str1), CRC32(str2) from table1 order by CRC32(str1), CRC32(str2);
DROP TABLE table1;

View File

@ -165,4 +165,9 @@ Returns a string that removes the whitespace characters on right side.
Returns a string that removes the whitespace characters on either side.
## CRC32(s)
Returns the CRC32 checksum of a string
The result type is UInt32.
[Original article](https://clickhouse.yandex/docs/en/query_language/functions/string_functions/) <!--hide-->

View File

@ -117,4 +117,8 @@ SELECT format('{} {}', 'Hello', 'World')
## tryBase64Decode(s)
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
## CRC32(s)
Возвращает чексумму CRC32 данной строки.
Тип результата - UInt32.
[Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/string_functions/) <!--hide-->