mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Slightly improve ip_dict loading performance, handle v4 to v6 masks in prepossessing, add more tests
This commit is contained in:
parent
8b91e0984c
commit
5e0e22301b
@ -28,11 +28,33 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
/// Intermediate stucture used for loading data
|
||||
struct IPRecord
|
||||
{
|
||||
Poco::Net::IPAddress addr;
|
||||
UInt8 prefix;
|
||||
size_t row;
|
||||
bool isv6;
|
||||
|
||||
IPRecord(const Poco::Net::IPAddress & addr_, UInt8 prefix_, size_t row_)
|
||||
: addr(addr_)
|
||||
, prefix(prefix_)
|
||||
, row(row_)
|
||||
, isv6(addr.family() == Poco::Net::IPAddress::IPv6)
|
||||
{
|
||||
}
|
||||
|
||||
const uint8_t * asIPv6Binary(uint8_t * buf) const
|
||||
{
|
||||
if (isv6)
|
||||
return reinterpret_cast<const uint8_t *>(addr.addr());
|
||||
memset(buf, 0, 10);
|
||||
buf[10] = '\xFF';
|
||||
buf[11] = '\xFF';
|
||||
memcpy(&buf[12], addr.addr(), 4);
|
||||
|
||||
return buf;
|
||||
}
|
||||
};
|
||||
|
||||
struct IPv4Subnet
|
||||
@ -70,6 +92,11 @@ static inline bool compPrefixes(UInt8 a, UInt8 b)
|
||||
return a < b;
|
||||
}
|
||||
|
||||
inline static UInt32 IPv4AsUInt32(const void * addr)
|
||||
{
|
||||
return Poco::ByteOrder::fromNetwork(*reinterpret_cast<const UInt32 *>(addr));
|
||||
}
|
||||
|
||||
/// Convert mapped IPv6 to IPv4 if possible
|
||||
inline static UInt32 mappedIPv4ToBinary(const uint8_t * addr, bool & success)
|
||||
{
|
||||
@ -81,7 +108,7 @@ inline static UInt32 mappedIPv4ToBinary(const uint8_t * addr, bool & success)
|
||||
addr[10] == 0xff && addr[11] == 0xff;
|
||||
if (!success)
|
||||
return 0;
|
||||
return Poco::ByteOrder::fromNetwork(*reinterpret_cast<const UInt32 *>(&addr[12]));
|
||||
return IPv4AsUInt32(&addr[12]);
|
||||
}
|
||||
|
||||
/// Convert IPv4 to IPv6-mapped and save results to buf
|
||||
@ -114,10 +141,8 @@ static bool matchIPv6Subnet(const uint8_t * target, const uint8_t * addr, UInt8
|
||||
{
|
||||
auto offset = __builtin_ctz(mask);
|
||||
|
||||
if (offset < prefix / 8)
|
||||
return false;
|
||||
if (offset >= prefix / 8 + 1)
|
||||
return true;
|
||||
if (prefix / 8 != offset)
|
||||
return prefix / 8 < offset;
|
||||
|
||||
auto cmpmask = ~(0xff >> (prefix % 8));
|
||||
return (target[offset] & cmpmask) == addr[offset];
|
||||
@ -454,14 +479,14 @@ void TrieDictionary::loadData()
|
||||
UInt8 prefix = std::stoi(addr_str.substr(pos + 1), nullptr, 10);
|
||||
|
||||
addr = addr & IPAddress(prefix, addr.family());
|
||||
ip_records.emplace_back(IPRecord{addr, prefix, row_number});
|
||||
ip_records.emplace_back(addr, prefix, row_number);
|
||||
}
|
||||
else
|
||||
{
|
||||
IPAddress addr(addr_str);
|
||||
has_ipv6 = has_ipv6 || (addr.family() == Poco::Net::IPAddress::IPv6);
|
||||
UInt8 prefix = addr.length() * 8;
|
||||
ip_records.emplace_back(IPRecord{addr, prefix, row_number});
|
||||
ip_records.emplace_back(addr, prefix, row_number);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -473,10 +498,10 @@ void TrieDictionary::loadData()
|
||||
std::sort(ip_records.begin(), ip_records.end(),
|
||||
[](const auto & record_a, const auto & record_b)
|
||||
{
|
||||
auto a = IPv6ToBinary(record_a.addr);
|
||||
auto b = IPv6ToBinary(record_b.addr);
|
||||
auto cmpres = memcmp16(reinterpret_cast<const uint8_t *>(a.data()),
|
||||
reinterpret_cast<const uint8_t *>(b.data()));
|
||||
uint8_t a_buf[IPV6_BINARY_LENGTH];
|
||||
uint8_t b_buf[IPV6_BINARY_LENGTH];
|
||||
|
||||
auto cmpres = memcmp16(record_a.asIPv6Binary(a_buf), record_b.asIPv6Binary(b_buf));
|
||||
|
||||
if (cmpres == 0)
|
||||
return compPrefixes(record_a.prefix, record_b.prefix);
|
||||
@ -520,7 +545,7 @@ void TrieDictionary::loadData()
|
||||
ipv4_col.reserve(ip_records.size());
|
||||
for (const auto & record : ip_records)
|
||||
{
|
||||
auto addr = Poco::ByteOrder::fromNetwork(*reinterpret_cast<const UInt32 *>(record.addr.addr()));
|
||||
auto addr = IPv4AsUInt32(record.addr.addr());
|
||||
ipv4_col.push_back(addr);
|
||||
mask_column.push_back(record.prefix);
|
||||
row_idx.push_back(record.row);
|
||||
@ -532,17 +557,34 @@ void TrieDictionary::loadData()
|
||||
for (const auto i : ext::range(0, ip_records.size()))
|
||||
{
|
||||
parent_subnet[i] = i;
|
||||
|
||||
const auto & cur_address = ip_records[i].addr;
|
||||
while (!subnets_stack.empty())
|
||||
{
|
||||
size_t subnet_idx = subnets_stack.top();
|
||||
const auto cur_subnet = ip_records[subnet_idx];
|
||||
auto cur_addr_masked = cur_address & IPAddress(cur_subnet.prefix, cur_address.family());
|
||||
if (cur_subnet.addr == cur_addr_masked)
|
||||
size_t pi = subnets_stack.top();
|
||||
if (has_ipv6)
|
||||
{
|
||||
parent_subnet[i] = subnet_idx;
|
||||
break;
|
||||
uint8_t a_buf[IPV6_BINARY_LENGTH];
|
||||
uint8_t b_buf[IPV6_BINARY_LENGTH];
|
||||
const auto * cur_address = ip_records[i].asIPv6Binary(a_buf);
|
||||
const auto * cur_subnet = ip_records[pi].asIPv6Binary(b_buf);
|
||||
|
||||
bool is_mask_smaller = ip_records[pi].prefix < ip_records[i].prefix;
|
||||
if (is_mask_smaller && matchIPv6Subnet(cur_address, cur_subnet, ip_records[pi].prefix))
|
||||
{
|
||||
parent_subnet[i] = pi;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
UInt32 cur_address = IPv4AsUInt32(ip_records[i].addr.addr());
|
||||
UInt32 cur_subnet = IPv4AsUInt32(ip_records[pi].addr.addr());
|
||||
|
||||
bool is_mask_smaller = ip_records[pi].prefix < ip_records[i].prefix;
|
||||
if (is_mask_smaller && matchIPv4Subnet(cur_address, cur_subnet, ip_records[pi].prefix))
|
||||
{
|
||||
parent_subnet[i] = pi;
|
||||
break;
|
||||
}
|
||||
}
|
||||
subnets_stack.pop();
|
||||
}
|
||||
@ -550,6 +592,7 @@ void TrieDictionary::loadData()
|
||||
}
|
||||
|
||||
LOG_TRACE(logger, "{} ip records are read", ip_records.size());
|
||||
|
||||
if (require_nonempty && 0 == element_count)
|
||||
throw Exception{full_name + ": dictionary source is empty and 'require_nonempty' property is set.", ErrorCodes::DICTIONARY_IS_EMPTY};
|
||||
}
|
||||
|
@ -3,6 +3,7 @@
|
||||
CREATE TABLE table_ip_trie
|
||||
(
|
||||
ip String,
|
||||
ver UInt8,
|
||||
val Float32
|
||||
) ENGINE = TinyLog
|
||||
</create_query>
|
||||
@ -10,16 +11,18 @@
|
||||
<create_query>
|
||||
INSERT INTO table_ip_trie
|
||||
SELECT
|
||||
IPv4NumToString(ipv4) || '/' || toString(rand() % 25 + 8) as ip,
|
||||
IPv4NumToString(ipv4) || '/' || toString(rand() % 17 + 16) as ip,
|
||||
4 as ver,
|
||||
val
|
||||
FROM generateRandom('ipv4 UInt32, val Float32', 0, 30, 30)
|
||||
LIMIT 1000000
|
||||
LIMIT 500000
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
INSERT INTO table_ip_trie
|
||||
SELECT
|
||||
IPv6NumToString(ipv6) || '/' || toString(rand() % 113 + 16) as ip,
|
||||
IPv6NumToString(ipv6) || '/' || toString(rand() % 65 + 64) as ip,
|
||||
6 as ver,
|
||||
val
|
||||
FROM generateRandom('ipv6 FixedString(16), val Float32', 0, 30, 30)
|
||||
LIMIT 2500000
|
||||
@ -29,6 +32,7 @@
|
||||
CREATE DICTIONARY dict_ip_trie
|
||||
(
|
||||
ip String,
|
||||
ver UInt8,
|
||||
val Float32
|
||||
)
|
||||
PRIMARY KEY ip
|
||||
@ -41,10 +45,19 @@
|
||||
CREATE TABLE dict_ip_trie_table
|
||||
(
|
||||
`ip` String,
|
||||
`ver` UInt8,
|
||||
`val` Float32
|
||||
) ENGINE = Dictionary(default.dict_ip_trie)
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
CREATE TABLE table_ip_from_dict (`ip` String, `ver` UInt8) ENGINE = TinyLog
|
||||
</create_query>
|
||||
<create_query>
|
||||
INSERT INTO table_ip_from_dict
|
||||
SELECT ip, ver FROM dict_ip_trie_table
|
||||
</create_query>
|
||||
|
||||
<query>
|
||||
SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(rand32()))
|
||||
FROM numbers(500000)
|
||||
@ -55,7 +68,22 @@
|
||||
FROM numbers(500000)
|
||||
</query>
|
||||
|
||||
<query>
|
||||
SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(IPv4StringToNum(ip)))
|
||||
FROM table_ip_from_dict
|
||||
WHERE ver == 4
|
||||
LIMIT 500000
|
||||
</query>
|
||||
|
||||
<query>
|
||||
SELECT dictGetFloat32('default.dict_ip_trie', 'val', tuple(IPv6StringToNum(ip)))
|
||||
FROM table_ip_from_dict
|
||||
WHERE ver == 6
|
||||
LIMIT 500000
|
||||
</query>
|
||||
|
||||
<drop_query>DROP DICTIONARY IF EXISTS default.dict_ip_trie</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS table_ip_trie</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS dict_ip_trie_table</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS table_ip_from_dict</drop_query>
|
||||
</test>
|
||||
|
@ -396,3 +396,29 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
|
@ -266,13 +266,13 @@ SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('200
|
||||
SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:ffff::')));
|
||||
SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('2001:db8:ffff:1::')));
|
||||
|
||||
SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::')));
|
||||
SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('654f:3716::')));
|
||||
SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('654f:3716:ffff::')));
|
||||
SELECT '0' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::')));
|
||||
|
||||
SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716')));
|
||||
SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22')));
|
||||
SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22')));
|
||||
SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv4StringToNum('127.0.0.1')));
|
||||
SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('::ffff:127.0.0.1')));
|
||||
|
||||
SELECT '0' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0')));
|
||||
SELECT '1' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::')));
|
||||
@ -439,6 +439,14 @@ SELECT
|
||||
toString(number) AS val
|
||||
FROM VALUES ('number UInt32', 5, 13, 24, 48, 49, 99, 127);
|
||||
|
||||
INSERT INTO database_for_dict.table_ip_trie VALUES ('101.79.55.22', 'JA');
|
||||
|
||||
INSERT INTO database_for_dict.table_ipv4_trie
|
||||
SELECT
|
||||
'255.255.255.255/' || toString(number) AS prefix,
|
||||
toString(number) AS val
|
||||
FROM VALUES ('number UInt32', 5, 13, 24, 30);
|
||||
|
||||
CREATE DICTIONARY database_for_dict.dict_ip_trie
|
||||
(
|
||||
prefix String,
|
||||
@ -451,6 +459,14 @@ LIFETIME(MIN 10 MAX 100);
|
||||
|
||||
SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('::ffff:1:1')));
|
||||
|
||||
SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('654f:3716::')));
|
||||
SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('654f:3716::')));
|
||||
SELECT 0 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('654f:3716:ffff::')));
|
||||
|
||||
SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:654f:3716')));
|
||||
SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::ffff:101.79.55.22')));
|
||||
SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4StringToNum('101.79.55.22')));
|
||||
|
||||
SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0')));
|
||||
SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::')));
|
||||
SELECT '' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::')));
|
||||
@ -587,4 +603,26 @@ SELECT '127' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv
|
||||
SELECT '127' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff')));
|
||||
|
||||
|
||||
SELECT '3' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.0')));
|
||||
SELECT '4' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.1')));
|
||||
SELECT '3' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.127')));
|
||||
SELECT '2' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.255.127')));
|
||||
SELECT '15' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.127.127')));
|
||||
SELECT '16' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.9')));
|
||||
SELECT '16' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.127')));
|
||||
SELECT '18' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.10')));
|
||||
SELECT '19' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.128.255')));
|
||||
SELECT '20' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.255.255.128')));
|
||||
|
||||
SELECT '3' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:0')));
|
||||
SELECT '4' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:1')));
|
||||
SELECT '3' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:7f')));
|
||||
SELECT '2' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7f00:ff7f')));
|
||||
SELECT '15' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:7f7f')));
|
||||
SELECT '16' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:8009')));
|
||||
SELECT '16' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:807f')));
|
||||
SELECT '18' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:800a')));
|
||||
SELECT '19' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:80ff')));
|
||||
SELECT '20' == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv6StringToNum('::ffff:7fff:ff80')));
|
||||
|
||||
DROP DATABASE IF EXISTS database_for_dict;
|
||||
|
Loading…
Reference in New Issue
Block a user