mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Add option access_to_key_from_attributes to ip dictionary
This commit is contained in:
parent
30e3900235
commit
1e3bd37380
@ -107,6 +107,18 @@ static std::pair<Poco::Net::IPAddress, UInt8> parseIPFromString(const std::strin
|
||||
}
|
||||
}
|
||||
|
||||
static size_t formatIPWithPrefix(const unsigned char * src, UInt8 prefix_len, bool isv4, char * dst)
|
||||
{
|
||||
char * ptr = dst;
|
||||
if (isv4)
|
||||
formatIPv4(src, ptr);
|
||||
else
|
||||
formatIPv6(src, ptr);
|
||||
*(ptr - 1) = '/';
|
||||
ptr = itoa(prefix_len, ptr);
|
||||
return ptr - dst;
|
||||
}
|
||||
|
||||
static void validateKeyTypes(const DataTypes & key_types)
|
||||
{
|
||||
if (key_types.empty() || key_types.size() > 2)
|
||||
@ -231,14 +243,21 @@ IPAddressDictionary::IPAddressDictionary(
|
||||
const DictionaryStructure & dict_struct_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
const DictionaryLifetime dict_lifetime_,
|
||||
bool require_nonempty_)
|
||||
bool require_nonempty_,
|
||||
bool access_to_key_from_attributes_)
|
||||
: IDictionaryBase(dict_id_)
|
||||
, dict_struct(dict_struct_)
|
||||
, source_ptr{std::move(source_ptr_)}
|
||||
, dict_lifetime(dict_lifetime_)
|
||||
, require_nonempty(require_nonempty_)
|
||||
, access_to_key_from_attributes(access_to_key_from_attributes_)
|
||||
, logger(&Poco::Logger::get("IPAddressDictionary"))
|
||||
{
|
||||
if (access_to_key_from_attributes)
|
||||
{
|
||||
dict_struct.attributes.emplace_back(dict_struct.key->front());
|
||||
}
|
||||
|
||||
createAttributes();
|
||||
|
||||
loadData();
|
||||
@ -453,8 +472,6 @@ void IPAddressDictionary::loadData()
|
||||
auto stream = source_ptr->loadAll();
|
||||
stream->readPrefix();
|
||||
|
||||
const auto attributes_size = attributes.size();
|
||||
|
||||
std::vector<IPRecord> ip_records;
|
||||
|
||||
bool has_ipv6 = false;
|
||||
@ -465,14 +482,19 @@ void IPAddressDictionary::loadData()
|
||||
element_count += rows;
|
||||
|
||||
const ColumnPtr key_column_ptr = block.safeGetByPosition(0).column;
|
||||
const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
|
||||
|
||||
size_t attributes_size = dict_struct.attributes.size();
|
||||
if (access_to_key_from_attributes)
|
||||
{
|
||||
return block.safeGetByPosition(attribute_idx + 1).column;
|
||||
});
|
||||
/// last attribute contains key and will be filled in code below
|
||||
attributes_size--;
|
||||
}
|
||||
const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size),
|
||||
[&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx + 1).column; });
|
||||
|
||||
for (const auto row : ext::range(0, rows))
|
||||
{
|
||||
for (const auto attribute_idx : ext::range(0, attributes_size))
|
||||
for (const auto attribute_idx : ext::range(0, attribute_column_ptrs.size()))
|
||||
{
|
||||
const auto & attribute_column = *attribute_column_ptrs[attribute_idx];
|
||||
auto & attribute = attributes[attribute_idx];
|
||||
@ -490,6 +512,33 @@ void IPAddressDictionary::loadData()
|
||||
|
||||
stream->readSuffix();
|
||||
|
||||
if (access_to_key_from_attributes)
|
||||
{
|
||||
/// We format key attribute values here instead of filling with data from key_column
|
||||
/// because string representation can be normalized if bits beyond mask are set.
|
||||
/// Also all IPv4 will be displayed as mapped IPv6 if threre are any IPv6.
|
||||
/// It's consistent with representation in table created with `ENGINE = Dictionary` from this dictionary.
|
||||
char str_buffer[48];
|
||||
if (has_ipv6)
|
||||
{
|
||||
uint8_t ip_buffer[IPV6_BINARY_LENGTH];
|
||||
for (const auto & record : ip_records)
|
||||
{
|
||||
size_t str_len = formatIPWithPrefix(record.asIPv6Binary(ip_buffer), record.prefixIPv6(), false, str_buffer);
|
||||
setAttributeValue(attributes.back(), String(str_buffer, str_len));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto & record : ip_records)
|
||||
{
|
||||
UInt32 addr = IPv4AsUInt32(record.addr.addr());
|
||||
size_t str_len = formatIPWithPrefix(reinterpret_cast<const unsigned char *>(&addr), record.prefix, true, str_buffer);
|
||||
setAttributeValue(attributes.back(), String(str_buffer, str_len));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
row_idx.reserve(ip_records.size());
|
||||
mask_column.reserve(ip_records.size());
|
||||
|
||||
@ -679,7 +728,7 @@ void IPAddressDictionary::calculateBytesAllocated()
|
||||
template <typename T>
|
||||
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||
{
|
||||
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
|
||||
attribute.null_values = null_value.isNull() ? T{} : T(null_value.get<NearestFieldType<T>>());
|
||||
attribute.maps.emplace<ContainerType<T>>();
|
||||
}
|
||||
|
||||
@ -735,7 +784,8 @@ IPAddressDictionary::Attribute IPAddressDictionary::createAttributeWithType(cons
|
||||
|
||||
case AttributeUnderlyingType::utString:
|
||||
{
|
||||
attr.null_values = null_value.get<String>();
|
||||
|
||||
attr.null_values = null_value.isNull() ? String() : null_value.get<String>();
|
||||
attr.maps.emplace<ContainerType<StringRef>>();
|
||||
attr.string_arena = std::make_unique<Arena>();
|
||||
break;
|
||||
@ -979,14 +1029,12 @@ static auto keyViewGetter()
|
||||
for (size_t row : ext::range(0, key_ip_column.size()))
|
||||
{
|
||||
UInt8 mask = key_mask_column.getElement(row);
|
||||
char * ptr = buffer;
|
||||
size_t str_len;
|
||||
if constexpr (IsIPv4)
|
||||
formatIPv4(reinterpret_cast<const unsigned char *>(&key_ip_column.getElement(row)), ptr);
|
||||
str_len = formatIPWithPrefix(reinterpret_cast<const unsigned char *>(&key_ip_column.getElement(row)), mask, true, buffer);
|
||||
else
|
||||
formatIPv6(reinterpret_cast<const unsigned char *>(key_ip_column.getDataAt(row).data), ptr);
|
||||
*(ptr - 1) = '/';
|
||||
ptr = itoa(mask, ptr);
|
||||
column->insertData(buffer, ptr - buffer);
|
||||
str_len = formatIPWithPrefix(reinterpret_cast<const unsigned char *>(key_ip_column.getDataAt(row).data), mask, false, buffer);
|
||||
column->insertData(buffer, str_len);
|
||||
}
|
||||
return ColumnsWithTypeAndName{
|
||||
ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), dict_attributes.front().name)};
|
||||
@ -1120,8 +1168,12 @@ void registerDictionaryTrie(DictionaryFactory & factory)
|
||||
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
|
||||
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||
|
||||
const auto & layout_prefix = config_prefix + ".layout.ip_trie";
|
||||
const bool access_to_key_from_attributes = config.getBool(layout_prefix + ".access_to_key_from_attributes", false);
|
||||
// This is specialised dictionary for storing IPv4 and IPv6 prefixes.
|
||||
return std::make_unique<IPAddressDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
||||
return std::make_unique<IPAddressDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime,
|
||||
require_nonempty, access_to_key_from_attributes);
|
||||
};
|
||||
factory.registerLayout("ip_trie", create_layout, true);
|
||||
}
|
||||
|
@ -27,7 +27,8 @@ public:
|
||||
const DictionaryStructure & dict_struct_,
|
||||
DictionarySourcePtr source_ptr_,
|
||||
const DictionaryLifetime dict_lifetime_,
|
||||
bool require_nonempty_);
|
||||
bool require_nonempty_,
|
||||
bool access_to_key_from_attributes_);
|
||||
|
||||
std::string getKeyDescription() const { return key_description; }
|
||||
|
||||
@ -45,7 +46,8 @@ public:
|
||||
|
||||
std::shared_ptr<const IExternalLoadable> clone() const override
|
||||
{
|
||||
return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
|
||||
return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime,
|
||||
require_nonempty, access_to_key_from_attributes);
|
||||
}
|
||||
|
||||
const IDictionarySource * getSource() const override { return source_ptr.get(); }
|
||||
@ -238,10 +240,11 @@ private:
|
||||
|
||||
static const uint8_t * getIPv6FromOffset(const IPv6Container & ipv6_col, size_t i);
|
||||
|
||||
const DictionaryStructure dict_struct;
|
||||
DictionaryStructure dict_struct;
|
||||
const DictionarySourcePtr source_ptr;
|
||||
const DictionaryLifetime dict_lifetime;
|
||||
const bool require_nonempty;
|
||||
const bool access_to_key_from_attributes;
|
||||
const std::string key_description{dict_struct.getKeyDescription()};
|
||||
|
||||
/// Contains sorted IP subnetworks. If some addresses equals, subnet with lower mask is placed first.
|
||||
|
@ -123,6 +123,16 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
***ipv6 trie dict***
|
||||
1
|
||||
1
|
||||
@ -273,6 +283,11 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
***ipv6 trie dict mask***
|
||||
1
|
||||
1
|
||||
|
@ -207,9 +207,20 @@ INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.255.255/32', 21);
|
||||
CREATE DICTIONARY database_for_dict.dict_ipv4_trie ( prefix String, val UInt32 )
|
||||
PRIMARY KEY prefix
|
||||
SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ipv4_trie'))
|
||||
LAYOUT(IP_TRIE())
|
||||
LAYOUT(IP_TRIE(ACCESS_TO_KEY_FROM_ATTRIBUTES 1))
|
||||
LIFETIME(MIN 10 MAX 100);
|
||||
|
||||
SELECT '127.0.0.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.0')));
|
||||
SELECT '127.0.0.1/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.1')));
|
||||
SELECT '127.0.0.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.127')));
|
||||
SELECT '127.0.0.0/16' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.255.127')));
|
||||
SELECT '127.255.0.0/16' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.127.127')));
|
||||
SELECT '127.255.128.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.9')));
|
||||
SELECT '127.255.128.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.127')));
|
||||
SELECT '127.255.128.10/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.10')));
|
||||
SELECT '127.255.128.128/25' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.255')));
|
||||
SELECT '127.255.255.128/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.255.128')));
|
||||
|
||||
SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.0')));
|
||||
SELECT 4 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.1')));
|
||||
SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.127')));
|
||||
@ -274,7 +285,7 @@ CREATE DICTIONARY database_for_dict.dict_ip_trie
|
||||
)
|
||||
PRIMARY KEY prefix
|
||||
SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie'))
|
||||
LAYOUT(IP_TRIE())
|
||||
LAYOUT(IP_TRIE(ACCESS_TO_KEY_FROM_ATTRIBUTES 1))
|
||||
LIFETIME(MIN 10 MAX 100);
|
||||
|
||||
SELECT 'US' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2620:0:870::')));
|
||||
@ -294,6 +305,12 @@ SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4
|
||||
SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv4StringToNum('127.0.0.1')));
|
||||
SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('::ffff:127.0.0.1')));
|
||||
|
||||
SELECT '2620:0:870::/48' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2620:0:870::')));
|
||||
SELECT '2a02:6b8:1::/48' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2a02:6b8:1::1')));
|
||||
SELECT '2001:db8::/32' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2001:db8::1')));
|
||||
SELECT '::ffff:101.79.55.22/128' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:654f:3716')));
|
||||
SELECT '::ffff:101.79.55.22/128' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:101.79.55.22')));
|
||||
|
||||
SELECT '0' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0')));
|
||||
SELECT '1' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::')));
|
||||
SELECT '2' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::')));
|
||||
|
Loading…
Reference in New Issue
Block a user