Add option access_to_key_from_attributes to ip dictionary

This commit is contained in:
vdimir 2020-12-24 07:31:09 +00:00
parent 30e3900235
commit 1e3bd37380
4 changed files with 108 additions and 21 deletions

View File

@ -107,6 +107,18 @@ static std::pair<Poco::Net::IPAddress, UInt8> parseIPFromString(const std::strin
}
}
static size_t formatIPWithPrefix(const unsigned char * src, UInt8 prefix_len, bool isv4, char * dst)
{
char * ptr = dst;
if (isv4)
formatIPv4(src, ptr);
else
formatIPv6(src, ptr);
*(ptr - 1) = '/';
ptr = itoa(prefix_len, ptr);
return ptr - dst;
}
static void validateKeyTypes(const DataTypes & key_types)
{
if (key_types.empty() || key_types.size() > 2)
@ -231,14 +243,21 @@ IPAddressDictionary::IPAddressDictionary(
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
bool require_nonempty_)
bool require_nonempty_,
bool access_to_key_from_attributes_)
: IDictionaryBase(dict_id_)
, dict_struct(dict_struct_)
, source_ptr{std::move(source_ptr_)}
, dict_lifetime(dict_lifetime_)
, require_nonempty(require_nonempty_)
, access_to_key_from_attributes(access_to_key_from_attributes_)
, logger(&Poco::Logger::get("IPAddressDictionary"))
{
if (access_to_key_from_attributes)
{
dict_struct.attributes.emplace_back(dict_struct.key->front());
}
createAttributes();
loadData();
@ -453,8 +472,6 @@ void IPAddressDictionary::loadData()
auto stream = source_ptr->loadAll();
stream->readPrefix();
const auto attributes_size = attributes.size();
std::vector<IPRecord> ip_records;
bool has_ipv6 = false;
@ -465,14 +482,19 @@ void IPAddressDictionary::loadData()
element_count += rows;
const ColumnPtr key_column_ptr = block.safeGetByPosition(0).column;
const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
size_t attributes_size = dict_struct.attributes.size();
if (access_to_key_from_attributes)
{
return block.safeGetByPosition(attribute_idx + 1).column;
});
/// last attribute contains key and will be filled in code below
attributes_size--;
}
const auto attribute_column_ptrs = ext::map<Columns>(ext::range(0, attributes_size),
[&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx + 1).column; });
for (const auto row : ext::range(0, rows))
{
for (const auto attribute_idx : ext::range(0, attributes_size))
for (const auto attribute_idx : ext::range(0, attribute_column_ptrs.size()))
{
const auto & attribute_column = *attribute_column_ptrs[attribute_idx];
auto & attribute = attributes[attribute_idx];
@ -490,6 +512,33 @@ void IPAddressDictionary::loadData()
stream->readSuffix();
if (access_to_key_from_attributes)
{
/// We format key attribute values here instead of filling with data from key_column
/// because string representation can be normalized if bits beyond mask are set.
/// Also all IPv4 will be displayed as mapped IPv6 if threre are any IPv6.
/// It's consistent with representation in table created with `ENGINE = Dictionary` from this dictionary.
char str_buffer[48];
if (has_ipv6)
{
uint8_t ip_buffer[IPV6_BINARY_LENGTH];
for (const auto & record : ip_records)
{
size_t str_len = formatIPWithPrefix(record.asIPv6Binary(ip_buffer), record.prefixIPv6(), false, str_buffer);
setAttributeValue(attributes.back(), String(str_buffer, str_len));
}
}
else
{
for (const auto & record : ip_records)
{
UInt32 addr = IPv4AsUInt32(record.addr.addr());
size_t str_len = formatIPWithPrefix(reinterpret_cast<const unsigned char *>(&addr), record.prefix, true, str_buffer);
setAttributeValue(attributes.back(), String(str_buffer, str_len));
}
}
}
row_idx.reserve(ip_records.size());
mask_column.reserve(ip_records.size());
@ -679,7 +728,7 @@ void IPAddressDictionary::calculateBytesAllocated()
template <typename T>
void IPAddressDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
attribute.null_values = null_value.isNull() ? T{} : T(null_value.get<NearestFieldType<T>>());
attribute.maps.emplace<ContainerType<T>>();
}
@ -735,7 +784,8 @@ IPAddressDictionary::Attribute IPAddressDictionary::createAttributeWithType(cons
case AttributeUnderlyingType::utString:
{
attr.null_values = null_value.get<String>();
attr.null_values = null_value.isNull() ? String() : null_value.get<String>();
attr.maps.emplace<ContainerType<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
@ -979,14 +1029,12 @@ static auto keyViewGetter()
for (size_t row : ext::range(0, key_ip_column.size()))
{
UInt8 mask = key_mask_column.getElement(row);
char * ptr = buffer;
size_t str_len;
if constexpr (IsIPv4)
formatIPv4(reinterpret_cast<const unsigned char *>(&key_ip_column.getElement(row)), ptr);
str_len = formatIPWithPrefix(reinterpret_cast<const unsigned char *>(&key_ip_column.getElement(row)), mask, true, buffer);
else
formatIPv6(reinterpret_cast<const unsigned char *>(key_ip_column.getDataAt(row).data), ptr);
*(ptr - 1) = '/';
ptr = itoa(mask, ptr);
column->insertData(buffer, ptr - buffer);
str_len = formatIPWithPrefix(reinterpret_cast<const unsigned char *>(key_ip_column.getDataAt(row).data), mask, false, buffer);
column->insertData(buffer, str_len);
}
return ColumnsWithTypeAndName{
ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), dict_attributes.front().name)};
@ -1120,8 +1168,12 @@ void registerDictionaryTrie(DictionaryFactory & factory)
const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix);
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
const auto & layout_prefix = config_prefix + ".layout.ip_trie";
const bool access_to_key_from_attributes = config.getBool(layout_prefix + ".access_to_key_from_attributes", false);
// This is specialised dictionary for storing IPv4 and IPv6 prefixes.
return std::make_unique<IPAddressDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
return std::make_unique<IPAddressDictionary>(dict_id, dict_struct, std::move(source_ptr), dict_lifetime,
require_nonempty, access_to_key_from_attributes);
};
factory.registerLayout("ip_trie", create_layout, true);
}

View File

@ -27,7 +27,8 @@ public:
const DictionaryStructure & dict_struct_,
DictionarySourcePtr source_ptr_,
const DictionaryLifetime dict_lifetime_,
bool require_nonempty_);
bool require_nonempty_,
bool access_to_key_from_attributes_);
std::string getKeyDescription() const { return key_description; }
@ -45,7 +46,8 @@ public:
std::shared_ptr<const IExternalLoadable> clone() const override
{
return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty);
return std::make_shared<IPAddressDictionary>(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime,
require_nonempty, access_to_key_from_attributes);
}
const IDictionarySource * getSource() const override { return source_ptr.get(); }
@ -238,10 +240,11 @@ private:
static const uint8_t * getIPv6FromOffset(const IPv6Container & ipv6_col, size_t i);
const DictionaryStructure dict_struct;
DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const DictionaryLifetime dict_lifetime;
const bool require_nonempty;
const bool access_to_key_from_attributes;
const std::string key_description{dict_struct.getKeyDescription()};
/// Contains sorted IP subnetworks. If some addresses equals, subnet with lower mask is placed first.

View File

@ -123,6 +123,16 @@
1
1
1
1
1
1
1
1
1
1
1
1
1
***ipv6 trie dict***
1
1
@ -273,6 +283,11 @@
1
1
1
1
1
1
1
1
***ipv6 trie dict mask***
1
1

View File

@ -207,9 +207,20 @@ INSERT INTO database_for_dict.table_ipv4_trie VALUES ('127.255.255.255/32', 21);
CREATE DICTIONARY database_for_dict.dict_ipv4_trie ( prefix String, val UInt32 )
PRIMARY KEY prefix
SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ipv4_trie'))
LAYOUT(IP_TRIE())
LAYOUT(IP_TRIE(ACCESS_TO_KEY_FROM_ATTRIBUTES 1))
LIFETIME(MIN 10 MAX 100);
SELECT '127.0.0.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.0')));
SELECT '127.0.0.1/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.1')));
SELECT '127.0.0.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.0.127')));
SELECT '127.0.0.0/16' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.0.255.127')));
SELECT '127.255.0.0/16' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.127.127')));
SELECT '127.255.128.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.9')));
SELECT '127.255.128.0/24' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.127')));
SELECT '127.255.128.10/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.10')));
SELECT '127.255.128.128/25' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.128.255')));
SELECT '127.255.255.128/32' == dictGetString('database_for_dict.dict_ipv4_trie', 'prefix', tuple(IPv4StringToNum('127.255.255.128')));
SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.0')));
SELECT 4 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.1')));
SELECT 3 == dictGetUInt32('database_for_dict.dict_ipv4_trie', 'val', tuple(IPv4StringToNum('127.0.0.127')));
@ -274,7 +285,7 @@ CREATE DICTIONARY database_for_dict.dict_ip_trie
)
PRIMARY KEY prefix
SOURCE(CLICKHOUSE(host 'localhost' port 9000 user 'default' db 'database_for_dict' table 'table_ip_trie'))
LAYOUT(IP_TRIE())
LAYOUT(IP_TRIE(ACCESS_TO_KEY_FROM_ATTRIBUTES 1))
LIFETIME(MIN 10 MAX 100);
SELECT 'US' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('2620:0:870::')));
@ -294,6 +305,12 @@ SELECT 'JA' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv4
SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv4StringToNum('127.0.0.1')));
SELECT 1 == dictHas('database_for_dict.dict_ip_trie', tuple(IPv6StringToNum('::ffff:127.0.0.1')));
SELECT '2620:0:870::/48' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2620:0:870::')));
SELECT '2a02:6b8:1::/48' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2a02:6b8:1::1')));
SELECT '2001:db8::/32' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('2001:db8::1')));
SELECT '::ffff:101.79.55.22/128' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:654f:3716')));
SELECT '::ffff:101.79.55.22/128' == dictGetString('database_for_dict.dict_ip_trie', 'prefix', tuple(IPv6StringToNum('::ffff:101.79.55.22')));
SELECT '0' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('::0')));
SELECT '1' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('8000::')));
SELECT '2' == dictGetString('database_for_dict.dict_ip_trie', 'val', tuple(IPv6StringToNum('c000::')));