Merge pull request #45043 from ClickHouse/fix-ip-function-hash

Fix hash functions for IPv4
This commit is contained in:
Yakov Olkhovskiy 2023-01-10 07:48:12 -05:00 committed by GitHub
commit 9e327306a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 191 additions and 22 deletions

View File

@ -160,9 +160,8 @@ class DataTypeDate32;
class DataTypeString;
class DataTypeFixedString;
class DataTypeUUID;
template <typename IPv> class DataTypeIP;
using DataTypeIPv4 = DataTypeIP<IPv4>;
using DataTypeIPv6 = DataTypeIP<IPv6>;
class DataTypeIPv4;
class DataTypeIPv6;
class DataTypeDateTime;
class DataTypeDateTime64;
template <typename T> class DataTypeEnum;

View File

@ -9,22 +9,60 @@
namespace DB
{
template <typename IPv>
class DataTypeIP : public IDataType
class DataTypeIPv4 : public IDataType
{
public:
static constexpr bool is_parametric = false;
using FieldType = IPv;
using ColumnType = ColumnVector<IPv>;
static constexpr auto type_id = TypeToTypeIndex<IPv>;
using FieldType = IPv4;
using ColumnType = ColumnVector<IPv4>;
static constexpr auto type_id = TypeToTypeIndex<IPv4>;
const char * getFamilyName() const override { return TypeName<IPv>.data(); }
const char * getFamilyName() const override { return TypeName<IPv4>.data(); }
TypeIndex getTypeId() const override { return type_id; }
Field getDefault() const override { return IPv{}; }
Field getDefault() const override { return IPv4{}; }
MutableColumnPtr createColumn() const override {return ColumnVector<IPv>::create();}
MutableColumnPtr createColumn() const override {return ColumnVector<IPv4>::create();}
bool isParametric() const override { return false; }
bool haveSubtypes() const override { return false; }
bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); }
bool canBeUsedInBitOperations() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool canBePromoted() const override { return false; }
bool shouldAlignRightInPrettyFormats() const override { return false; }
bool textCanContainOnlyValidUTF8() const override { return true; }
bool isComparable() const override { return true; }
bool isValueRepresentedByNumber() const override { return true; }
bool isValueRepresentedByUnsignedInteger() const override { return true; }
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
bool haveMaximumSizeOfValue() const override { return true; }
size_t getSizeOfValueInMemory() const override { return sizeof(IPv4); }
bool isCategorial() const override { return true; }
bool canBeInsideLowCardinality() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv4>>(); }
};
class DataTypeIPv6 : public IDataType
{
public:
static constexpr bool is_parametric = false;
using FieldType = IPv6;
using ColumnType = ColumnVector<IPv6>;
static constexpr auto type_id = TypeToTypeIndex<IPv6>;
const char * getFamilyName() const override { return TypeName<IPv6>.data(); }
TypeIndex getTypeId() const override { return type_id; }
Field getDefault() const override { return IPv6{}; }
MutableColumnPtr createColumn() const override {return ColumnVector<IPv6>::create();}
bool isParametric() const override { return false; }
bool haveSubtypes() const override { return false; }
@ -40,14 +78,12 @@ public:
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
bool haveMaximumSizeOfValue() const override { return true; }
size_t getSizeOfValueInMemory() const override { return sizeof(IPv); }
size_t getSizeOfValueInMemory() const override { return sizeof(IPv6); }
bool isCategorial() const override { return true; }
bool canBeInsideLowCardinality() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv>>(); }
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv6>>(); }
};
using DataTypeIPv4 = DataTypeIP<IPv4>;
using DataTypeIPv6 = DataTypeIP<IPv6>;
}

View File

@ -6,6 +6,7 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Common/typeid_cast.h>
#include <base/IPv4andIPv6.h>
#include <Interpreters/Context_fwd.h>
@ -43,7 +44,7 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isInteger(arguments[0]))
if (!isInteger(arguments[0]) && !isIPv4(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of the first argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -132,6 +133,8 @@ private:
executeType<Int32>(hash_col, num_buckets, res_col.get());
else if (which.isInt64())
executeType<Int64>(hash_col, num_buckets, res_col.get());
else if (which.isIPv4())
executeType<IPv4>(hash_col, num_buckets, res_col.get());
else
throw Exception("Illegal type " + hash_type->getName() + " of the first argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -52,6 +52,7 @@
#include <Functions/FunctionHelpers.h>
#include <Functions/PerformanceAdaptors.h>
#include <Common/TargetSpecific.h>
#include <base/IPv4andIPv6.h>
#include <base/range.h>
#include <base/bit_cast.h>
@ -690,7 +691,7 @@ public:
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isStringOrFixedString(arguments[0]))
if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -742,6 +743,22 @@ public:
}
return col_to;
}
else if (
const ColumnIPv6 * col_from_ip = checkAndGetColumn<ColumnIPv6>(arguments[0].column.get()))
{
auto col_to = ColumnFixedString::create(Impl::length);
const typename ColumnIPv6::Container & data = col_from_ip->getData();
const auto size = col_from_ip->size();
auto & chars_to = col_to->getChars();
const auto length = IPV6_BINARY_LENGTH;
chars_to.resize(size * Impl::length);
for (size_t i = 0; i < size; ++i)
{
Impl::apply(
reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
}
return col_to;
}
else
throw Exception("Illegal column " + arguments[0].column->getName()
+ " of first argument of function " + getName(),
@ -838,6 +855,8 @@ public:
return executeType<Decimal32>(arguments);
else if (which.isDecimal64())
return executeType<Decimal64>(arguments);
else if (which.isIPv4())
return executeType<IPv4>(arguments);
else
throw Exception("Illegal type " + arguments[0].type->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -243,17 +243,18 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
= DecimalUtils::decimalFromComponents<DateTime64>(applyVisitor(FieldVisitorConvertToNumber<Int64>(), src), 0, scale);
return Field(DecimalField<DateTime64>(decimal_value, scale));
}
if (which_type.isIPv4() && src.getType() == Field::Types::IPv4)
{
/// Already in needed type.
return src;
}
}
else if (which_type.isUUID() && src.getType() == Field::Types::UUID)
{
/// Already in needed type.
return src;
}
else if (which_type.isIPv4() && src.getType() == Field::Types::IPv4)
{
/// Already in needed type.
return src;
}
else if (which_type.isIPv6())
{
/// Already in needed type.

View File

@ -0,0 +1,54 @@
Row 1:
──────
ipv4: 1.2.3.4
halfMD5(toIPv4('1.2.3.4')): 14356538739656272800
farmFingerprint64(toIPv4('1.2.3.4')): 5715546585361069049
xxh3(toIPv4('1.2.3.4')): 14355428563589734825
wyHash64(toIPv4('1.2.3.4')): 13096729196120951355
xxHash32(toIPv4('1.2.3.4')): 2430391091
gccMurmurHash(toIPv4('1.2.3.4')): 5478801830569062645
murmurHash2_32(toIPv4('1.2.3.4')): 1658978282
javaHashUTF16LE(toIPv4('1.2.3.4')): 24190
intHash64(toIPv4('1.2.3.4')): 5715546585361069049
intHash32(toIPv4('1.2.3.4')): 3152671896
metroHash64(toIPv4('1.2.3.4')): 5715546585361069049
hex(murmurHash3_128(toIPv4('1.2.3.4'))): 549E9EF692591F6BB55874EF9A0DE88E
jumpConsistentHash(toIPv4('1.2.3.4'), 42): 37
sipHash64(toIPv4('1.2.3.4')): 10711397536826262068
hex(sipHash128(toIPv4('1.2.3.4'))): DBB6A76B92B59789EFB42185DC32311D
kostikConsistentHash(toIPv4('1.2.3.4'), 42): 0
xxHash64(toIPv4('1.2.3.4')): 14496144933713060978
murmurHash2_64(toIPv4('1.2.3.4')): 10829690723193326442
cityHash64(toIPv4('1.2.3.4')): 5715546585361069049
hiveHash(toIPv4('1.2.3.4')): 122110
murmurHash3_64(toIPv4('1.2.3.4')): 16570805747704317665
murmurHash3_32(toIPv4('1.2.3.4')): 1165084099
yandexConsistentHash(toIPv4('1.2.3.4'), 42): 0
Row 1:
──────
ipv6: fe80::62:5aff:fed1:daf0
halfMD5(toIPv6('fe80::62:5aff:fed1:daf0')): 9503062220758009199
hex(MD4(toIPv6('fe80::62:5aff:fed1:daf0'))): E35A1A4FB3A3953421AB348B2E1A4A1A
hex(MD5(toIPv6('fe80::62:5aff:fed1:daf0'))): 83E1A8BD8AB7456FC229208409F79798
hex(SHA1(toIPv6('fe80::62:5aff:fed1:daf0'))): A6D5DCE882AC44804382DE4639E6001612E1C8B5
hex(SHA224(toIPv6('fe80::62:5aff:fed1:daf0'))): F6995FD7BED2BCA21F68DAC6BBABE742DC1BA177BA8594CEF1715C52
hex(SHA256(toIPv6('fe80::62:5aff:fed1:daf0'))): F75497BAD6F7747BD6B150B6F69BA2DEE354F1C2A34B7BEA6183973B78640250
hex(SHA512(toIPv6('fe80::62:5aff:fed1:daf0'))): 0C2893CCBF44BC19CCF339AEED5B68CBFD5A2EF38263A48FE21C3379BA4438E7FF7A02F59D7542442C6E6ED538E6D13D65D3573DADB381651D3D8A5DEA232EAC
farmFingerprint64(toIPv6('fe80::62:5aff:fed1:daf0')): 6643158734288374888
javaHash(toIPv6('fe80::62:5aff:fed1:daf0')): 684606770
xxh3(toIPv6('fe80::62:5aff:fed1:daf0')): 4051340969481364358
wyHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 18071806066582739916
xxHash32(toIPv6('fe80::62:5aff:fed1:daf0')): 3353862080
gccMurmurHash(toIPv6('fe80::62:5aff:fed1:daf0')): 11049311547848936878
murmurHash2_32(toIPv6('fe80::62:5aff:fed1:daf0')): 1039121047
javaHashUTF16LE(toIPv6('fe80::62:5aff:fed1:daf0')): -666938696
metroHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 15333045864940909774
hex(sipHash128(toIPv6('fe80::62:5aff:fed1:daf0'))): 31D50562F877B1F92A99B05B646568B7
hex(murmurHash3_128(toIPv6('fe80::62:5aff:fed1:daf0'))): 6FFEF0C1DF8B5B472FE2EDF0C76C12B9
sipHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 5681592867096972315
xxHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 4533874364641685764
murmurHash2_64(toIPv6('fe80::62:5aff:fed1:daf0')): 11839090601505681839
cityHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 1599722731594796935
hiveHash(toIPv6('fe80::62:5aff:fed1:daf0')): 684606770
murmurHash3_64(toIPv6('fe80::62:5aff:fed1:daf0')): 18323430650022796352
murmurHash3_32(toIPv6('fe80::62:5aff:fed1:daf0')): 3971193740

View File

@ -0,0 +1,57 @@
-- Tags: no-fasttest
SELECT
toIPv4('1.2.3.4') AS ipv4,
halfMD5(ipv4),
farmFingerprint64(ipv4),
xxh3(ipv4),
wyHash64(ipv4),
xxHash32(ipv4),
gccMurmurHash(ipv4),
murmurHash2_32(ipv4),
javaHashUTF16LE(ipv4),
intHash64(ipv4),
intHash32(ipv4),
metroHash64(ipv4),
hex(murmurHash3_128(ipv4)),
jumpConsistentHash(ipv4, 42),
sipHash64(ipv4),
hex(sipHash128(ipv4)),
kostikConsistentHash(ipv4, 42),
xxHash64(ipv4),
murmurHash2_64(ipv4),
cityHash64(ipv4),
hiveHash(ipv4),
murmurHash3_64(ipv4),
murmurHash3_32(ipv4),
yandexConsistentHash(ipv4,42)
FORMAT Vertical;
SELECT
toIPv6('fe80::62:5aff:fed1:daf0') AS ipv6,
halfMD5(ipv6),
hex(MD4(ipv6)),
hex(MD5(ipv6)),
hex(SHA1(ipv6)),
hex(SHA224(ipv6)),
hex(SHA256(ipv6)),
hex(SHA512(ipv6)),
farmFingerprint64(ipv6),
javaHash(ipv6),
xxh3(ipv6),
wyHash64(ipv6),
xxHash32(ipv6),
gccMurmurHash(ipv6),
murmurHash2_32(ipv6),
javaHashUTF16LE(ipv6),
metroHash64(ipv6),
hex(sipHash128(ipv6)),
hex(murmurHash3_128(ipv6)),
sipHash64(ipv6),
xxHash64(ipv6),
murmurHash2_64(ipv6),
cityHash64(ipv6),
hiveHash(ipv6),
murmurHash3_64(ipv6),
murmurHash3_32(ipv6)
FORMAT Vertical;