mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge pull request #45043 from ClickHouse/fix-ip-function-hash
Fix hash functions for IPv4
This commit is contained in:
commit
9e327306a3
@ -160,9 +160,8 @@ class DataTypeDate32;
|
||||
class DataTypeString;
|
||||
class DataTypeFixedString;
|
||||
class DataTypeUUID;
|
||||
template <typename IPv> class DataTypeIP;
|
||||
using DataTypeIPv4 = DataTypeIP<IPv4>;
|
||||
using DataTypeIPv6 = DataTypeIP<IPv6>;
|
||||
class DataTypeIPv4;
|
||||
class DataTypeIPv6;
|
||||
class DataTypeDateTime;
|
||||
class DataTypeDateTime64;
|
||||
template <typename T> class DataTypeEnum;
|
||||
|
@ -9,22 +9,60 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
template <typename IPv>
|
||||
class DataTypeIP : public IDataType
|
||||
class DataTypeIPv4 : public IDataType
|
||||
{
|
||||
public:
|
||||
static constexpr bool is_parametric = false;
|
||||
|
||||
using FieldType = IPv;
|
||||
using ColumnType = ColumnVector<IPv>;
|
||||
static constexpr auto type_id = TypeToTypeIndex<IPv>;
|
||||
using FieldType = IPv4;
|
||||
using ColumnType = ColumnVector<IPv4>;
|
||||
static constexpr auto type_id = TypeToTypeIndex<IPv4>;
|
||||
|
||||
const char * getFamilyName() const override { return TypeName<IPv>.data(); }
|
||||
const char * getFamilyName() const override { return TypeName<IPv4>.data(); }
|
||||
TypeIndex getTypeId() const override { return type_id; }
|
||||
|
||||
Field getDefault() const override { return IPv{}; }
|
||||
Field getDefault() const override { return IPv4{}; }
|
||||
|
||||
MutableColumnPtr createColumn() const override {return ColumnVector<IPv>::create();}
|
||||
MutableColumnPtr createColumn() const override {return ColumnVector<IPv4>::create();}
|
||||
|
||||
bool isParametric() const override { return false; }
|
||||
bool haveSubtypes() const override { return false; }
|
||||
|
||||
bool equals(const IDataType & rhs) const override { return typeid(rhs) == typeid(*this); }
|
||||
|
||||
bool canBeUsedInBitOperations() const override { return true; }
|
||||
bool canBeInsideNullable() const override { return true; }
|
||||
bool canBePromoted() const override { return false; }
|
||||
bool shouldAlignRightInPrettyFormats() const override { return false; }
|
||||
bool textCanContainOnlyValidUTF8() const override { return true; }
|
||||
bool isComparable() const override { return true; }
|
||||
bool isValueRepresentedByNumber() const override { return true; }
|
||||
bool isValueRepresentedByUnsignedInteger() const override { return true; }
|
||||
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
|
||||
bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
|
||||
bool haveMaximumSizeOfValue() const override { return true; }
|
||||
size_t getSizeOfValueInMemory() const override { return sizeof(IPv4); }
|
||||
bool isCategorial() const override { return true; }
|
||||
bool canBeInsideLowCardinality() const override { return true; }
|
||||
|
||||
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv4>>(); }
|
||||
};
|
||||
|
||||
class DataTypeIPv6 : public IDataType
|
||||
{
|
||||
public:
|
||||
static constexpr bool is_parametric = false;
|
||||
|
||||
using FieldType = IPv6;
|
||||
using ColumnType = ColumnVector<IPv6>;
|
||||
static constexpr auto type_id = TypeToTypeIndex<IPv6>;
|
||||
|
||||
const char * getFamilyName() const override { return TypeName<IPv6>.data(); }
|
||||
TypeIndex getTypeId() const override { return type_id; }
|
||||
|
||||
Field getDefault() const override { return IPv6{}; }
|
||||
|
||||
MutableColumnPtr createColumn() const override {return ColumnVector<IPv6>::create();}
|
||||
|
||||
bool isParametric() const override { return false; }
|
||||
bool haveSubtypes() const override { return false; }
|
||||
@ -40,14 +78,12 @@ public:
|
||||
bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
|
||||
bool isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion() const override { return true; }
|
||||
bool haveMaximumSizeOfValue() const override { return true; }
|
||||
size_t getSizeOfValueInMemory() const override { return sizeof(IPv); }
|
||||
size_t getSizeOfValueInMemory() const override { return sizeof(IPv6); }
|
||||
bool isCategorial() const override { return true; }
|
||||
bool canBeInsideLowCardinality() const override { return true; }
|
||||
|
||||
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv>>(); }
|
||||
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationIP<IPv6>>(); }
|
||||
};
|
||||
|
||||
using DataTypeIPv4 = DataTypeIP<IPv4>;
|
||||
using DataTypeIPv6 = DataTypeIP<IPv6>;
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <base/IPv4andIPv6.h>
|
||||
#include <Interpreters/Context_fwd.h>
|
||||
|
||||
|
||||
@ -43,7 +44,7 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isInteger(arguments[0]))
|
||||
if (!isInteger(arguments[0]) && !isIPv4(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of the first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
@ -132,6 +133,8 @@ private:
|
||||
executeType<Int32>(hash_col, num_buckets, res_col.get());
|
||||
else if (which.isInt64())
|
||||
executeType<Int64>(hash_col, num_buckets, res_col.get());
|
||||
else if (which.isIPv4())
|
||||
executeType<IPv4>(hash_col, num_buckets, res_col.get());
|
||||
else
|
||||
throw Exception("Illegal type " + hash_type->getName() + " of the first argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
@ -52,6 +52,7 @@
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <Functions/PerformanceAdaptors.h>
|
||||
#include <Common/TargetSpecific.h>
|
||||
#include <base/IPv4andIPv6.h>
|
||||
#include <base/range.h>
|
||||
#include <base/bit_cast.h>
|
||||
|
||||
@ -690,7 +691,7 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isStringOrFixedString(arguments[0]))
|
||||
if (!isStringOrFixedString(arguments[0]) && !isIPv6(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
@ -742,6 +743,22 @@ public:
|
||||
}
|
||||
return col_to;
|
||||
}
|
||||
else if (
|
||||
const ColumnIPv6 * col_from_ip = checkAndGetColumn<ColumnIPv6>(arguments[0].column.get()))
|
||||
{
|
||||
auto col_to = ColumnFixedString::create(Impl::length);
|
||||
const typename ColumnIPv6::Container & data = col_from_ip->getData();
|
||||
const auto size = col_from_ip->size();
|
||||
auto & chars_to = col_to->getChars();
|
||||
const auto length = IPV6_BINARY_LENGTH;
|
||||
chars_to.resize(size * Impl::length);
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
Impl::apply(
|
||||
reinterpret_cast<const char *>(&data[i * length]), length, reinterpret_cast<uint8_t *>(&chars_to[i * Impl::length]));
|
||||
}
|
||||
return col_to;
|
||||
}
|
||||
else
|
||||
throw Exception("Illegal column " + arguments[0].column->getName()
|
||||
+ " of first argument of function " + getName(),
|
||||
@ -838,6 +855,8 @@ public:
|
||||
return executeType<Decimal32>(arguments);
|
||||
else if (which.isDecimal64())
|
||||
return executeType<Decimal64>(arguments);
|
||||
else if (which.isIPv4())
|
||||
return executeType<IPv4>(arguments);
|
||||
else
|
||||
throw Exception("Illegal type " + arguments[0].type->getName() + " of argument of function " + getName(),
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
@ -243,17 +243,18 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
|
||||
= DecimalUtils::decimalFromComponents<DateTime64>(applyVisitor(FieldVisitorConvertToNumber<Int64>(), src), 0, scale);
|
||||
return Field(DecimalField<DateTime64>(decimal_value, scale));
|
||||
}
|
||||
|
||||
if (which_type.isIPv4() && src.getType() == Field::Types::IPv4)
|
||||
{
|
||||
/// Already in needed type.
|
||||
return src;
|
||||
}
|
||||
}
|
||||
else if (which_type.isUUID() && src.getType() == Field::Types::UUID)
|
||||
{
|
||||
/// Already in needed type.
|
||||
return src;
|
||||
}
|
||||
else if (which_type.isIPv4() && src.getType() == Field::Types::IPv4)
|
||||
{
|
||||
/// Already in needed type.
|
||||
return src;
|
||||
}
|
||||
else if (which_type.isIPv6())
|
||||
{
|
||||
/// Already in needed type.
|
||||
|
@ -0,0 +1,54 @@
|
||||
Row 1:
|
||||
──────
|
||||
ipv4: 1.2.3.4
|
||||
halfMD5(toIPv4('1.2.3.4')): 14356538739656272800
|
||||
farmFingerprint64(toIPv4('1.2.3.4')): 5715546585361069049
|
||||
xxh3(toIPv4('1.2.3.4')): 14355428563589734825
|
||||
wyHash64(toIPv4('1.2.3.4')): 13096729196120951355
|
||||
xxHash32(toIPv4('1.2.3.4')): 2430391091
|
||||
gccMurmurHash(toIPv4('1.2.3.4')): 5478801830569062645
|
||||
murmurHash2_32(toIPv4('1.2.3.4')): 1658978282
|
||||
javaHashUTF16LE(toIPv4('1.2.3.4')): 24190
|
||||
intHash64(toIPv4('1.2.3.4')): 5715546585361069049
|
||||
intHash32(toIPv4('1.2.3.4')): 3152671896
|
||||
metroHash64(toIPv4('1.2.3.4')): 5715546585361069049
|
||||
hex(murmurHash3_128(toIPv4('1.2.3.4'))): 549E9EF692591F6BB55874EF9A0DE88E
|
||||
jumpConsistentHash(toIPv4('1.2.3.4'), 42): 37
|
||||
sipHash64(toIPv4('1.2.3.4')): 10711397536826262068
|
||||
hex(sipHash128(toIPv4('1.2.3.4'))): DBB6A76B92B59789EFB42185DC32311D
|
||||
kostikConsistentHash(toIPv4('1.2.3.4'), 42): 0
|
||||
xxHash64(toIPv4('1.2.3.4')): 14496144933713060978
|
||||
murmurHash2_64(toIPv4('1.2.3.4')): 10829690723193326442
|
||||
cityHash64(toIPv4('1.2.3.4')): 5715546585361069049
|
||||
hiveHash(toIPv4('1.2.3.4')): 122110
|
||||
murmurHash3_64(toIPv4('1.2.3.4')): 16570805747704317665
|
||||
murmurHash3_32(toIPv4('1.2.3.4')): 1165084099
|
||||
yandexConsistentHash(toIPv4('1.2.3.4'), 42): 0
|
||||
Row 1:
|
||||
──────
|
||||
ipv6: fe80::62:5aff:fed1:daf0
|
||||
halfMD5(toIPv6('fe80::62:5aff:fed1:daf0')): 9503062220758009199
|
||||
hex(MD4(toIPv6('fe80::62:5aff:fed1:daf0'))): E35A1A4FB3A3953421AB348B2E1A4A1A
|
||||
hex(MD5(toIPv6('fe80::62:5aff:fed1:daf0'))): 83E1A8BD8AB7456FC229208409F79798
|
||||
hex(SHA1(toIPv6('fe80::62:5aff:fed1:daf0'))): A6D5DCE882AC44804382DE4639E6001612E1C8B5
|
||||
hex(SHA224(toIPv6('fe80::62:5aff:fed1:daf0'))): F6995FD7BED2BCA21F68DAC6BBABE742DC1BA177BA8594CEF1715C52
|
||||
hex(SHA256(toIPv6('fe80::62:5aff:fed1:daf0'))): F75497BAD6F7747BD6B150B6F69BA2DEE354F1C2A34B7BEA6183973B78640250
|
||||
hex(SHA512(toIPv6('fe80::62:5aff:fed1:daf0'))): 0C2893CCBF44BC19CCF339AEED5B68CBFD5A2EF38263A48FE21C3379BA4438E7FF7A02F59D7542442C6E6ED538E6D13D65D3573DADB381651D3D8A5DEA232EAC
|
||||
farmFingerprint64(toIPv6('fe80::62:5aff:fed1:daf0')): 6643158734288374888
|
||||
javaHash(toIPv6('fe80::62:5aff:fed1:daf0')): 684606770
|
||||
xxh3(toIPv6('fe80::62:5aff:fed1:daf0')): 4051340969481364358
|
||||
wyHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 18071806066582739916
|
||||
xxHash32(toIPv6('fe80::62:5aff:fed1:daf0')): 3353862080
|
||||
gccMurmurHash(toIPv6('fe80::62:5aff:fed1:daf0')): 11049311547848936878
|
||||
murmurHash2_32(toIPv6('fe80::62:5aff:fed1:daf0')): 1039121047
|
||||
javaHashUTF16LE(toIPv6('fe80::62:5aff:fed1:daf0')): -666938696
|
||||
metroHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 15333045864940909774
|
||||
hex(sipHash128(toIPv6('fe80::62:5aff:fed1:daf0'))): 31D50562F877B1F92A99B05B646568B7
|
||||
hex(murmurHash3_128(toIPv6('fe80::62:5aff:fed1:daf0'))): 6FFEF0C1DF8B5B472FE2EDF0C76C12B9
|
||||
sipHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 5681592867096972315
|
||||
xxHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 4533874364641685764
|
||||
murmurHash2_64(toIPv6('fe80::62:5aff:fed1:daf0')): 11839090601505681839
|
||||
cityHash64(toIPv6('fe80::62:5aff:fed1:daf0')): 1599722731594796935
|
||||
hiveHash(toIPv6('fe80::62:5aff:fed1:daf0')): 684606770
|
||||
murmurHash3_64(toIPv6('fe80::62:5aff:fed1:daf0')): 18323430650022796352
|
||||
murmurHash3_32(toIPv6('fe80::62:5aff:fed1:daf0')): 3971193740
|
@ -0,0 +1,57 @@
|
||||
-- Tags: no-fasttest
|
||||
|
||||
SELECT
|
||||
toIPv4('1.2.3.4') AS ipv4,
|
||||
halfMD5(ipv4),
|
||||
farmFingerprint64(ipv4),
|
||||
xxh3(ipv4),
|
||||
wyHash64(ipv4),
|
||||
xxHash32(ipv4),
|
||||
gccMurmurHash(ipv4),
|
||||
murmurHash2_32(ipv4),
|
||||
javaHashUTF16LE(ipv4),
|
||||
intHash64(ipv4),
|
||||
intHash32(ipv4),
|
||||
metroHash64(ipv4),
|
||||
hex(murmurHash3_128(ipv4)),
|
||||
jumpConsistentHash(ipv4, 42),
|
||||
sipHash64(ipv4),
|
||||
hex(sipHash128(ipv4)),
|
||||
kostikConsistentHash(ipv4, 42),
|
||||
xxHash64(ipv4),
|
||||
murmurHash2_64(ipv4),
|
||||
cityHash64(ipv4),
|
||||
hiveHash(ipv4),
|
||||
murmurHash3_64(ipv4),
|
||||
murmurHash3_32(ipv4),
|
||||
yandexConsistentHash(ipv4,42)
|
||||
FORMAT Vertical;
|
||||
|
||||
SELECT
|
||||
toIPv6('fe80::62:5aff:fed1:daf0') AS ipv6,
|
||||
halfMD5(ipv6),
|
||||
hex(MD4(ipv6)),
|
||||
hex(MD5(ipv6)),
|
||||
hex(SHA1(ipv6)),
|
||||
hex(SHA224(ipv6)),
|
||||
hex(SHA256(ipv6)),
|
||||
hex(SHA512(ipv6)),
|
||||
farmFingerprint64(ipv6),
|
||||
javaHash(ipv6),
|
||||
xxh3(ipv6),
|
||||
wyHash64(ipv6),
|
||||
xxHash32(ipv6),
|
||||
gccMurmurHash(ipv6),
|
||||
murmurHash2_32(ipv6),
|
||||
javaHashUTF16LE(ipv6),
|
||||
metroHash64(ipv6),
|
||||
hex(sipHash128(ipv6)),
|
||||
hex(murmurHash3_128(ipv6)),
|
||||
sipHash64(ipv6),
|
||||
xxHash64(ipv6),
|
||||
murmurHash2_64(ipv6),
|
||||
cityHash64(ipv6),
|
||||
hiveHash(ipv6),
|
||||
murmurHash3_64(ipv6),
|
||||
murmurHash3_32(ipv6)
|
||||
FORMAT Vertical;
|
Loading…
Reference in New Issue
Block a user