Dictionaries/TrieDictionary: IP prefix dictionary

This commit implements a basic IP prefix dictionary
that allows storing IPv4/IPv6 prefixes and
matching them against a single IP address on query
time. This allows for doing IP -> ASN matching and
other similar things on query time.

The implementation is basic for start, using a
simple bitwise trie and reusing interface for
complex key dictionaries (so using tuple instead
of UInt32/FixedString(16) as the key). A faster
bitwise trie implementation (like poptrie) is
desired to improve lookup performance and
memory consumption with large prefix tables.
This commit is contained in:
Marek Vavruša 2017-05-12 14:23:12 -07:00 committed by alexey-milovidov
parent e7b7f6f73d
commit 9520234365
12 changed files with 1578 additions and 4 deletions

View File

@ -31,6 +31,7 @@ endif ()
add_subdirectory (libcityhash)
add_subdirectory (libfarmhash)
add_subdirectory (libmetrohash)
add_subdirectory (libbtrie)
if (USE_INTERNAL_ZLIB_LIBRARY)
add_subdirectory (libzlib-ng)

View File

@ -0,0 +1,6 @@
include_directories (BEFORE include)
add_library (btrie
src/btrie.c
include/btrie.h
)

23
contrib/libbtrie/LICENSE Normal file
View File

@ -0,0 +1,23 @@
Copyright (c) 2013, CobbLiu
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this
list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,155 @@
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
#include <stdlib.h>
#include <stdint.h>
/**
* In btrie, each leaf means one bit in ip tree.
* Left means 0, and right means 1.
*/
#define BTRIE_NULL (uintptr_t) -1
#define MAX_PAGES 1024 * 16
typedef struct btrie_node_s btrie_node_t;
struct btrie_node_s {
btrie_node_t *right;
btrie_node_t *left;
btrie_node_t *parent;
uintptr_t value;
};
typedef struct btrie_s {
btrie_node_t *root;
btrie_node_t *free; /* free list of btrie */
char *start;
size_t size;
/*
* memory pool.
* memory management(esp free) will be so easy by using this facility.
*/
char *pools[MAX_PAGES];
size_t len;
} btrie_t;
/**
* Create an empty btrie
*
* @Return:
* An ip radix_tree created.
* NULL if creation failed.
*/
btrie_t *btrie_create();
/**
* Destroy the ip radix_tree
*
* @Return:
* OK if deletion succeed.
* ERROR if error occurs while deleting.
*/
int btrie_destroy(btrie_t *tree);
/**
* Count the nodes in the radix tree.
*/
size_t btrie_count(btrie_t *tree);
/**
* Return the allocated number of bytes.
*/
size_t btrie_allocated(btrie_t *tree);
/**
* Add an ipv4 into btrie
*
* @Args:
* key: ip address
* mask: key's mask
* value: value of this IP, may be NULL.
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
uintptr_t value);
/**
* Delete an ipv4 from btrie
*
* @Args:
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask);
/**
* Find an ipv4 from btrie
*
* @Args:
*
* @Return:
* Value if succeed.
* NULL if failed.
*/
uintptr_t btrie_find(btrie_t *tree, uint32_t key);
/**
* Add an ipv6 into btrie
*
* @Args:
* key: ip address
* mask: key's mask
* value: value of this IP, may be NULL.
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
uintptr_t value);
/**
* Delete an ipv6 from btrie
*
* @Args:
*
* @Return:
* OK for success.
* ERROR for failure.
*/
int btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask);
/**
* Find an ipv6 from btrie
*
* @Args:
*
* @Return:
* Value if succeed.
* NULL if failed.
*/
uintptr_t btrie_find_a6(btrie_t *tree, const uint8_t *key);
#if defined (__cplusplus)
}
#endif

View File

@ -0,0 +1,460 @@
#include <stdlib.h>
#include <string.h>
#include <btrie.h>
#define PAGE_SIZE 4096
static btrie_node_t *
btrie_alloc(btrie_t *tree)
{
btrie_node_t *p;
if (tree->free) {
p = tree->free;
tree->free = tree->free->right;
return p;
}
if (tree->size < sizeof(btrie_node_t)) {
tree->start = (char *) calloc(sizeof(char), PAGE_SIZE);
if (tree->start == NULL) {
return NULL;
}
tree->pools[tree->len++] = tree->start;
tree->size = PAGE_SIZE;
}
p = (btrie_node_t *) tree->start;
tree->start += sizeof(btrie_node_t);
tree->size -= sizeof(btrie_node_t);
return p;
}
btrie_t *
btrie_create()
{
btrie_t *tree = (btrie_t *) malloc(sizeof(btrie_t));
if (tree == NULL) {
return NULL;
}
tree->free = NULL;
tree->start = NULL;
tree->size = 0;
memset(tree->pools, 0, sizeof(btrie_t *) * MAX_PAGES);
tree->len = 0;
tree->root = btrie_alloc(tree);
if (tree->root == NULL) {
return NULL;
}
tree->root->right = NULL;
tree->root->left = NULL;
tree->root->parent = NULL;
tree->root->value = BTRIE_NULL;
return tree;
}
static size_t
subtree_weight(btrie_node_t *node)
{
size_t weight = 1;
if (node->left) {
weight += subtree_weight(node->left);
}
if (node->right) {
weight += subtree_weight(node->right);
}
return weight;
}
size_t
btrie_count(btrie_t *tree)
{
if (tree->root == NULL) {
return 0;
}
return subtree_weight(tree->root);
}
size_t
btrie_allocated(btrie_t *tree)
{
return tree->len * PAGE_SIZE;
}
int
btrie_insert(btrie_t *tree, uint32_t key, uint32_t mask,
uintptr_t value)
{
uint32_t bit;
btrie_node_t *node, *next;
bit = 0x80000000;
node = tree->root;
next = tree->root;
while (bit & mask) {
if (key & bit) {
next = node->right;
} else {
next = node->left;
}
if (next == NULL) {
break;
}
bit >>= 1;
node = next;
}
if (next) {
if (node->value != BTRIE_NULL) {
return -1;
}
node->value = value;
return 0;
}
while (bit & mask) {
next = btrie_alloc(tree);
if (next == NULL) {
return -1;
}
next->right = NULL;
next->left = NULL;
next->parent = node;
next->value = BTRIE_NULL;
if (key & bit) {
node->right = next;
} else {
node->left = next;
}
bit >>= 1;
node = next;
}
node->value = value;
return 0;
}
int
btrie_delete(btrie_t *tree, uint32_t key, uint32_t mask)
{
uint32_t bit;
btrie_node_t *node;
bit = 0x80000000;
node = tree->root;
while (node && (bit & mask)) {
if (key & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
}
if (node == NULL) {
return -1;
}
if (node->right || node->left) {
if (node->value != BTRIE_NULL) {
node->value = BTRIE_NULL;
return 0;
}
return -1;
}
for ( ;; ) {
if (node->parent->right == node) {
node->parent->right = NULL;
} else {
node->parent->left = NULL;
}
node->right = tree->free;
tree->free = node;
node = node->parent;
if (node->right || node->left) {
break;
}
if (node->value != BTRIE_NULL) {
break;
}
if (node->parent == NULL) {
break;
}
}
return 0;
}
uintptr_t
btrie_find(btrie_t *tree, uint32_t key)
{
uint32_t bit;
uintptr_t value;
btrie_node_t *node;
bit = 0x80000000;
value = BTRIE_NULL;
node = tree->root;
while (node) {
if (node->value != BTRIE_NULL) {
value = node->value;
}
if (key & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
}
return value;
}
int
btrie_insert_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask,
uintptr_t value)
{
uint8_t bit;
uint i;
btrie_node_t *node, *next;
i = 0;
bit = 0x80;
node = tree->root;
next = tree->root;
while (bit & mask[i]) {
if (key[i] & bit) {
next = node->right;
} else {
next = node->left;
}
if (next == NULL) {
break;
}
bit >>= 1;
node = next;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
if (next) {
if (node->value != BTRIE_NULL) {
return -1;
}
node->value = value;
return 0;
}
while (bit & mask[i]) {
next = btrie_alloc(tree);
if (next == NULL) {
return -1;
}
next->right = NULL;
next->left = NULL;
next->parent = node;
next->value = BTRIE_NULL;
if (key[i] & bit) {
node->right = next;
} else {
node->left = next;
}
bit >>= 1;
node = next;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
node->value = value;
return 0;
}
int
btrie_delete_a6(btrie_t *tree, const uint8_t *key, const uint8_t *mask)
{
uint8_t bit;
uint i;
btrie_node_t *node;
i = 0;
bit = 0x80;
node = tree->root;
while (node && (bit & mask[i])) {
if (key[i] & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
if (bit == 0) {
if (++i == 16) {
break;
}
bit = 0x80;
}
}
if (node == NULL) {
return -1;
}
if (node->right || node->left) {
if (node->value != BTRIE_NULL) {
node->value = BTRIE_NULL;
return 0;
}
return -1;
}
for ( ;; ) {
if (node->parent->right == node) {
node->parent->right = NULL;
} else {
node->parent->left = NULL;
}
node->right = tree->free;
tree->free = node;
node = node->parent;
if (node->right || node->left) {
break;
}
if (node->value != BTRIE_NULL) {
break;
}
if (node->parent == NULL) {
break;
}
}
return 0;
}
uintptr_t
btrie_find_a6(btrie_t *tree, const uint8_t *key)
{
uint8_t bit;
uintptr_t value;
uint i;
btrie_node_t *node;
i = 0;
bit = 0x80;
value = BTRIE_NULL;
node = tree->root;
while (node) {
if (node->value != BTRIE_NULL) {
value = node->value;
}
if (key[i] & bit) {
node = node->right;
} else {
node = node->left;
}
bit >>= 1;
if (bit == 0) {
i++;
bit = 0x80;
}
}
return value;
}
int
btrie_destroy(btrie_t *tree)
{
size_t i;
/* free memory pools */
for (i = 0; i < tree->len; i++) {
free(tree->pools[i]);
}
free(tree);
return 0;
}

View File

@ -0,0 +1,94 @@
#include <stdio.h>
#include <btrie.h>
int main()
{
btrie_t *it;
int ret;
uint8_t prefix_v6[16] = {0xde, 0xad, 0xbe, 0xef};
uint8_t mask_v6[16] = {0xff, 0xff, 0xff};
uint8_t ip_v6[16] = {0xde, 0xad, 0xbe, 0xef, 0xde};
it = btrie_create();
if (it == NULL) {
printf("create error!\n");
return 0;
}
//add 101.45.69.50/16
ret = btrie_insert(it, 1697465650, 0xffff0000, 1);
if (ret != 0) {
printf("insert 1 error.\n");
goto error;
}
//add 10.45.69.50/16
ret = btrie_insert(it, 170738994, 0xffff0000, 1);
if (ret != 0) {
printf("insert 2 error.\n");
goto error;
}
//add 10.45.79.50/16
ret = btrie_insert(it, 170741554, 0xffff0000, 1);
if (ret == 0) {
printf("insert 3 error.\n");
goto error;
}
//add 102.45.79.50/24
ret = btrie_insert(it, 1714245426, 0xffffff00, 1);
if (ret != 0) {
printf("insert 4 error.\n");
goto error;
}
ret = btrie_find(it, 170741554);
if (ret == 1) {
printf("test case 1 passed\n");
} else {
printf("test case 1 error\n");
}
ret = btrie_find(it, 170786817);
if (ret != 1) {
printf("test case 2 passed\n");
} else {
printf("test case 2 error\n");
}
ret = btrie_delete(it, 1714245426, 0xffffff00);
if (ret != 0) {
printf("delete 1 error\n");
goto error;
}
ret = btrie_find(it, 1714245426);
if (ret != 1) {
printf("test case 3 passed\n");
} else {
printf("test case 3 error\n");
}
//add dead:beef::/32
ret = btrie_insert_a6(it, prefix_v6, mask_v6, 1);
if (ret != 0) {
printf("insert 5 error\n");
goto error;
}
ret = btrie_find_a6(it, ip_v6);
if (ret == 1) {
printf("test case 4 passed\n");
} else {
printf("test case 4 error\n");
}
return 0;
error:
btrie_destroy(it);
printf("test failed\n");
return 1;
}

View File

@ -21,6 +21,7 @@ include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libdivide)
include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libcpuid/include)
include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libfarmhash)
include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libmetrohash/src)
include_directories (BEFORE ${ClickHouse_SOURCE_DIR}/contrib/libbtrie/include)
include_directories (${ClickHouse_SOURCE_DIR}/libs/libdaemon/include)
include_directories (${ClickHouse_BINARY_DIR}/dbms/src)
@ -153,6 +154,7 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
src/Dictionaries/FlatDictionary.cpp
src/Dictionaries/HashedDictionary.cpp
src/Dictionaries/CacheDictionary.cpp
src/Dictionaries/TrieDictionary.cpp
src/Dictionaries/RangeHashedDictionary.cpp
src/Dictionaries/ComplexKeyHashedDictionary.cpp
src/Dictionaries/ComplexKeyCacheDictionary.cpp
@ -185,6 +187,7 @@ target_link_libraries (dbms
${OPENSSL_CRYPTO_LIBRARY}
${Boost_SYSTEM_LIBRARY}
${Poco_Data_LIBRARY}
btrie
)
if (Poco_DataODBC_FOUND)

View File

@ -0,0 +1,545 @@
#include <ext/map.hpp>
#include <ext/range.hpp>
#include <Poco/Net/IPAddress.h>
#include <Poco/ByteOrder.h>
#include <Dictionaries/TrieDictionary.h>
#include <iostream>
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int BAD_ARGUMENTS;
extern const int DICTIONARY_IS_EMPTY;
}
TrieDictionary::TrieDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime, bool require_nonempty)
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime),
require_nonempty(require_nonempty)
{
createAttributes();
trie = btrie_create();
try
{
loadData();
calculateBytesAllocated();
}
catch (...)
{
creation_exception = std::current_exception();
}
creation_time = std::chrono::system_clock::now();
}
TrieDictionary::TrieDictionary(const TrieDictionary & other)
: TrieDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.require_nonempty}
{
trie = btrie_create();
}
TrieDictionary::~TrieDictionary()
{
btrie_destroy(trie);
}
#define DECLARE(TYPE)\
void TrieDictionary::get##TYPE(\
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,\
PaddedPODArray<TYPE> & out) const\
{\
validateKeyTypes(key_types);\
\
const auto & attribute = getAttribute(attribute_name);\
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\
throw Exception{\
name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
ErrorCodes::TYPE_MISMATCH};\
\
const auto null_value = std::get<TYPE>(attribute.null_values);\
\
getItemsNumber<TYPE>(attribute, key_columns,\
[&] (const std::size_t row, const auto value) { out[row] = value; },\
[&] (const std::size_t) { return null_value; });\
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
#undef DECLARE
void TrieDictionary::getString(
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,
ColumnString * out) const
{
validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{
name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
const auto & null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsImpl<StringRef, StringRef>(attribute, key_columns,
[&] (const std::size_t row, const StringRef value) { out->insertData(value.data, value.size); },
[&] (const std::size_t) { return null_value; });
}
#define DECLARE(TYPE)\
void TrieDictionary::get##TYPE(\
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,\
const PaddedPODArray<TYPE> & def, PaddedPODArray<TYPE> & out) const\
{\
validateKeyTypes(key_types);\
\
const auto & attribute = getAttribute(attribute_name);\
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\
throw Exception{\
name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
ErrorCodes::TYPE_MISMATCH};\
\
getItemsNumber<TYPE>(attribute, key_columns,\
[&] (const std::size_t row, const auto value) { out[row] = value; },\
[&] (const std::size_t row) { return def[row]; });\
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
#undef DECLARE
void TrieDictionary::getString(
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,
const ColumnString * const def, ColumnString * const out) const
{
validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{
name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, key_columns,
[&] (const std::size_t row, const StringRef value) { out->insertData(value.data, value.size); },
[&] (const std::size_t row) { return def->getDataAt(row); });
}
#define DECLARE(TYPE)\
void TrieDictionary::get##TYPE(\
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,\
const TYPE def, PaddedPODArray<TYPE> & out) const\
{\
validateKeyTypes(key_types);\
\
const auto & attribute = getAttribute(attribute_name);\
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::TYPE))\
throw Exception{\
name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\
ErrorCodes::TYPE_MISMATCH};\
\
getItemsNumber<TYPE>(attribute, key_columns,\
[&] (const std::size_t row, const auto value) { out[row] = value; },\
[&] (const std::size_t) { return def; });\
}
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
#undef DECLARE
void TrieDictionary::getString(
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,
const String & def, ColumnString * const out) const
{
validateKeyTypes(key_types);
const auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{
name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsImpl<StringRef, StringRef>(attribute, key_columns,
[&] (const std::size_t row, const StringRef value) { out->insertData(value.data, value.size); },
[&] (const std::size_t) { return StringRef{def}; });
}
void TrieDictionary::has(const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const
{
validateKeyTypes(key_types);
const auto & attribute = attributes.front();
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: has<UInt8>(attribute, key_columns, out); break;
case AttributeUnderlyingType::UInt16: has<UInt16>(attribute, key_columns, out); break;
case AttributeUnderlyingType::UInt32: has<UInt32>(attribute, key_columns, out); break;
case AttributeUnderlyingType::UInt64: has<UInt64>(attribute, key_columns, out); break;
case AttributeUnderlyingType::Int8: has<Int8>(attribute, key_columns, out); break;
case AttributeUnderlyingType::Int16: has<Int16>(attribute, key_columns, out); break;
case AttributeUnderlyingType::Int32: has<Int32>(attribute, key_columns, out); break;
case AttributeUnderlyingType::Int64: has<Int64>(attribute, key_columns, out); break;
case AttributeUnderlyingType::Float32: has<Float32>(attribute, key_columns, out); break;
case AttributeUnderlyingType::Float64: has<Float64>(attribute, key_columns, out); break;
case AttributeUnderlyingType::String: has<StringRef>(attribute, key_columns, out); break;
}
}
void TrieDictionary::createAttributes()
{
const auto size = dict_struct.attributes.size();
attributes.reserve(size);
for (const auto & attribute : dict_struct.attributes)
{
attribute_index_by_name.emplace(attribute.name, attributes.size());
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
if (attribute.hierarchical)
throw Exception{
name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
ErrorCodes::TYPE_MISMATCH};
}
}
void TrieDictionary::loadData()
{
auto stream = source_ptr->loadAll();
stream->readPrefix();
/// created upfront to avoid excess allocations
const auto keys_size = dict_struct.key.value().size();
StringRefs keys(keys_size);
const auto attributes_size = attributes.size();
while (const auto block = stream->read())
{
const auto rows = block.rows();
element_count += rows;
const auto key_column_ptrs = ext::map<ConstColumnPlainPtrs>(ext::range(0, keys_size),
[&] (const std::size_t attribute_idx) {
return block.safeGetByPosition(attribute_idx).column.get();
});
const auto attribute_column_ptrs = ext::map<ConstColumnPlainPtrs>(ext::range(0, attributes_size),
[&] (const std::size_t attribute_idx) {
return block.safeGetByPosition(keys_size + attribute_idx).column.get();
});
for (const auto row_idx : ext::range(0, rows))
{
/// calculate key once per row
const auto key_column = key_column_ptrs.front();
for (const auto attribute_idx : ext::range(0, attributes_size))
{
const auto & attribute_column = *attribute_column_ptrs[attribute_idx];
auto & attribute = attributes[attribute_idx];
setAttributeValue(attribute, key_column->getDataAt(row_idx), attribute_column[row_idx]);
}
}
}
stream->readSuffix();
if (require_nonempty && 0 == element_count)
throw Exception{
name + ": dictionary source is empty and 'require_nonempty' property is set.",
ErrorCodes::DICTIONARY_IS_EMPTY};
}
template <typename T>
void TrieDictionary::addAttributeSize(const Attribute & attribute)
{
const auto & vec = *std::get<ContainerPtrType<T>>(attribute.maps);
bytes_allocated += sizeof(ContainerType<T>) + (vec.capacity() * sizeof(T));
bucket_count = vec.size();
}
void TrieDictionary::calculateBytesAllocated()
{
bytes_allocated += attributes.size() * sizeof(attributes.front());
for (const auto & attribute : attributes)
{
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: addAttributeSize<UInt8>(attribute); break;
case AttributeUnderlyingType::UInt16: addAttributeSize<UInt16>(attribute); break;
case AttributeUnderlyingType::UInt32: addAttributeSize<UInt32>(attribute); break;
case AttributeUnderlyingType::UInt64: addAttributeSize<UInt64>(attribute); break;
case AttributeUnderlyingType::Int8: addAttributeSize<Int8>(attribute); break;
case AttributeUnderlyingType::Int16: addAttributeSize<Int16>(attribute); break;
case AttributeUnderlyingType::Int32: addAttributeSize<Int32>(attribute); break;
case AttributeUnderlyingType::Int64: addAttributeSize<Int64>(attribute); break;
case AttributeUnderlyingType::Float32: addAttributeSize<Float32>(attribute); break;
case AttributeUnderlyingType::Float64: addAttributeSize<Float64>(attribute); break;
case AttributeUnderlyingType::String:
{
addAttributeSize<StringRef>(attribute);
bytes_allocated += sizeof(Arena) + attribute.string_arena->size();
break;
}
}
}
bytes_allocated += btrie_allocated(trie);
}
void TrieDictionary::validateKeyTypes(const DataTypes & key_types) const
{
if (key_types.size() != 1)
throw Exception{
"Expected a single IP address",
ErrorCodes::TYPE_MISMATCH};
const auto & actual_type = key_types[0]->getName();
if (actual_type != "UInt32" && actual_type != "FixedString(16)")
throw Exception{
"Key does not match, expected either UInt32 or FixedString(16)",
ErrorCodes::TYPE_MISMATCH};
}
template <typename T>
void TrieDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
{
std::get<T>(attribute.null_values) = null_value.get<typename NearestFieldType<T>::Type>();
std::get<ContainerPtrType<T>>(attribute.maps) = std::make_unique<ContainerType<T>>();
}
TrieDictionary::Attribute TrieDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{
Attribute attr{type};
switch (type)
{
case AttributeUnderlyingType::UInt8: createAttributeImpl<UInt8>(attr, null_value); break;
case AttributeUnderlyingType::UInt16: createAttributeImpl<UInt16>(attr, null_value); break;
case AttributeUnderlyingType::UInt32: createAttributeImpl<UInt32>(attr, null_value); break;
case AttributeUnderlyingType::UInt64: createAttributeImpl<UInt64>(attr, null_value); break;
case AttributeUnderlyingType::Int8: createAttributeImpl<Int8>(attr, null_value); break;
case AttributeUnderlyingType::Int16: createAttributeImpl<Int16>(attr, null_value); break;
case AttributeUnderlyingType::Int32: createAttributeImpl<Int32>(attr, null_value); break;
case AttributeUnderlyingType::Int64: createAttributeImpl<Int64>(attr, null_value); break;
case AttributeUnderlyingType::Float32: createAttributeImpl<Float32>(attr, null_value); break;
case AttributeUnderlyingType::Float64: createAttributeImpl<Float64>(attr, null_value); break;
case AttributeUnderlyingType::String:
{
std::get<String>(attr.null_values) = null_value.get<String>();
std::get<ContainerPtrType<StringRef>>(attr.maps) = std::make_unique<ContainerType<StringRef>>();
attr.string_arena = std::make_unique<Arena>();
break;
}
}
return attr;
}
template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void TrieDictionary::getItemsNumber(
const Attribute & attribute,
const ConstColumnPlainPtrs & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const
{
if (false) {}
#define DISPATCH(TYPE) \
else if (attribute.type == AttributeUnderlyingType::TYPE) \
getItemsImpl<TYPE, OutputType>(attribute, key_columns, std::forward<ValueSetter>(set_value), std::forward<DefaultGetter>(get_default));
DISPATCH(UInt8)
DISPATCH(UInt16)
DISPATCH(UInt32)
DISPATCH(UInt64)
DISPATCH(Int8)
DISPATCH(Int16)
DISPATCH(Int32)
DISPATCH(Int64)
DISPATCH(Float32)
DISPATCH(Float64)
#undef DISPATCH
else
throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
}
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void TrieDictionary::getItemsImpl(
const Attribute & attribute,
const ConstColumnPlainPtrs & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const
{
auto & vec = *std::get<ContainerPtrType<AttributeType>>(attribute.maps);
const auto first_column = key_columns.front();
const auto rows = first_column->size();
if (first_column->isNumeric())
{
for (const auto i : ext::range(0, rows))
{
auto addr = Int32(first_column->get64(i));
uintptr_t slot = btrie_find(trie, addr);
set_value(i, slot != BTRIE_NULL ? vec[slot] : get_default(i));
}
}
else
{
for (const auto i : ext::range(0, rows))
{
auto addr = first_column->getDataAt(i);
if (addr.size != 16)
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8*>(addr.data));
set_value(i, slot != BTRIE_NULL ? vec[slot] : get_default(i));
}
}
query_count.fetch_add(rows, std::memory_order_relaxed);
}
template <typename T>
bool TrieDictionary::setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value)
{
// Insert value into appropriate vector type
auto & vec = *std::get<ContainerPtrType<T>>(attribute.maps);
size_t row = vec.size();
vec.push_back(value);
// Parse IP address and subnet length from string (e.g. 2a02:6b8::3/64)
Poco::Net::IPAddress addr, mask;
std::string addr_str(key.toString());
size_t pos = addr_str.find('/');
if (pos != std::string::npos)
{
addr = Poco::Net::IPAddress(addr_str.substr(0, pos));
mask = Poco::Net::IPAddress(std::stoi(addr_str.substr(pos + 1), nullptr, 10), addr.family());
}
else
{
addr = Poco::Net::IPAddress(addr_str);
mask = Poco::Net::IPAddress(addr.length() * 8, addr.family());
}
/*
* Here we might overwrite the same key with the same slot as each key can map to multiple attributes.
* However, all columns have equal number of rows so it is okay to store only row number for each key
* instead of building a trie for each column. This comes at the cost of additional lookup in attribute
* vector on lookup time to return cell from row + column. The reason for this is to save space,
* and build only single trie instead of trie for each column.
*/
if (addr.family() == Poco::Net::IPAddress::IPv4)
{
UInt32 addr_v4 = Poco::ByteOrder::toNetwork(*reinterpret_cast<const UInt32*>(addr.addr()));
UInt32 mask_v4 = Poco::ByteOrder::toNetwork(*reinterpret_cast<const UInt32*>(mask.addr()));
return btrie_insert(trie, addr_v4, mask_v4, row) == 0;
}
const uint8_t* addr_v6 = reinterpret_cast<const uint8_t*>(addr.addr());
const uint8_t* mask_v6 = reinterpret_cast<const uint8_t*>(mask.addr());
return btrie_insert_a6(trie, addr_v6, mask_v6, row) == 0;
}
bool TrieDictionary::setAttributeValue(Attribute & attribute, const StringRef key, const Field & value)
{
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: return setAttributeValueImpl<UInt8>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::UInt16: return setAttributeValueImpl<UInt16>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::UInt32: return setAttributeValueImpl<UInt32>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::UInt64: return setAttributeValueImpl<UInt64>(attribute, key, value.get<UInt64>());
case AttributeUnderlyingType::Int8: return setAttributeValueImpl<Int8>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int16: return setAttributeValueImpl<Int16>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int32: return setAttributeValueImpl<Int32>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Int64: return setAttributeValueImpl<Int64>(attribute, key, value.get<Int64>());
case AttributeUnderlyingType::Float32: return setAttributeValueImpl<Float32>(attribute, key, value.get<Float64>());
case AttributeUnderlyingType::Float64: return setAttributeValueImpl<Float64>(attribute, key, value.get<Float64>());
case AttributeUnderlyingType::String:
{
const auto & string = value.get<String>();
const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size());
setAttributeValueImpl<StringRef>(attribute, key, StringRef{string_in_arena, string.size()});
return true;
}
}
return {};
}
const TrieDictionary::Attribute & TrieDictionary::getAttribute(const std::string & attribute_name) const
{
const auto it = attribute_index_by_name.find(attribute_name);
if (it == std::end(attribute_index_by_name))
throw Exception{
name + ": no such attribute '" + attribute_name + "'",
ErrorCodes::BAD_ARGUMENTS};
return attributes[it->second];
}
template <typename T>
void TrieDictionary::has(const Attribute & attribute, const ConstColumnPlainPtrs & key_columns, PaddedPODArray<UInt8> & out) const
{
const auto first_column = key_columns.front();
const auto rows = first_column->size();
if (first_column->isNumeric())
{
for (const auto i : ext::range(0, rows))
{
auto addr = Int32(first_column->get64(i));
uintptr_t slot = btrie_find(trie, addr);
out[i] = (slot != BTRIE_NULL);
}
}
else
{
for (const auto i : ext::range(0, rows))
{
auto addr = first_column->getDataAt(i);
if (unlikely(addr.size != 16))
throw Exception("Expected key to be FixedString(16)", ErrorCodes::LOGICAL_ERROR);
uintptr_t slot = btrie_find_a6(trie, reinterpret_cast<const UInt8*>(addr.data));
out[i] = (slot != BTRIE_NULL);
}
}
query_count.fetch_add(rows, std::memory_order_relaxed);}
}

View File

@ -0,0 +1,216 @@
#pragma once
#include <Dictionaries/IDictionary.h>
#include <Dictionaries/IDictionarySource.h>
#include <Dictionaries/DictionaryStructure.h>
#include <Core/StringRef.h>
#include <Common/HashTable/HashMap.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <ext/range.hpp>
#include <btrie.h>
#include <atomic>
#include <memory>
#include <tuple>
namespace DB
{
class TrieDictionary final : public IDictionaryBase
{
public:
TrieDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime, bool require_nonempty);
TrieDictionary(const TrieDictionary & other);
~TrieDictionary();
std::string getKeyDescription() const { return key_description; };
std::exception_ptr getCreationException() const override { return creation_exception; }
std::string getName() const override { return name; }
std::string getTypeName() const override { return "Trie"; }
std::size_t getBytesAllocated() const override { return bytes_allocated; }
std::size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
double getHitRate() const override { return 1.0; }
std::size_t getElementCount() const override { return element_count; }
double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
bool isCached() const override { return false; }
DictionaryPtr clone() const override { return std::make_unique<TrieDictionary>(*this); }
const IDictionarySource * getSource() const override { return source_ptr.get(); }
const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override
{
return creation_time;
}
bool isInjective(const std::string & attribute_name) const override
{
return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective;
}
#define DECLARE(TYPE)\
void get##TYPE(\
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,\
PaddedPODArray<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
#undef DECLARE
void getString(
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,
ColumnString * out) const;
#define DECLARE(TYPE)\
void get##TYPE(\
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,\
const PaddedPODArray<TYPE> & def, PaddedPODArray<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
#undef DECLARE
void getString(
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,
const ColumnString * const def, ColumnString * const out) const;
#define DECLARE(TYPE)\
void get##TYPE(\
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,\
const TYPE def, PaddedPODArray<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
DECLARE(UInt64)
DECLARE(Int8)
DECLARE(Int16)
DECLARE(Int32)
DECLARE(Int64)
DECLARE(Float32)
DECLARE(Float64)
#undef DECLARE
void getString(
const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,
const String & def, ColumnString * const out) const;
void has(const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
private:
template <typename Value> using ContainerType = std::vector<Value>;
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
struct Attribute final
{
AttributeUnderlyingType type;
std::tuple<
UInt8, UInt16, UInt32, UInt64,
Int8, Int16, Int32, Int64,
Float32, Float64,
String> null_values;
std::tuple<
ContainerPtrType<UInt8>, ContainerPtrType<UInt16>, ContainerPtrType<UInt32>, ContainerPtrType<UInt64>,
ContainerPtrType<Int8>, ContainerPtrType<Int16>, ContainerPtrType<Int32>, ContainerPtrType<Int64>,
ContainerPtrType<Float32>, ContainerPtrType<Float64>,
ContainerPtrType<StringRef>> maps;
std::unique_ptr<Arena> string_arena;
};
void createAttributes();
void loadData();
template <typename T>
void addAttributeSize(const Attribute & attribute);
void calculateBytesAllocated();
void validateKeyTypes(const DataTypes & key_types) const;
template <typename T>
void createAttributeImpl(Attribute & attribute, const Field & null_value);
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsNumber(
const Attribute & attribute,
const ConstColumnPlainPtrs & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl(
const Attribute & attribute,
const ConstColumnPlainPtrs & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const;
template <typename T>
bool setAttributeValueImpl(Attribute & attribute, const StringRef key, const T value);
bool setAttributeValue(Attribute & attribute, const StringRef key, const Field & value);
const Attribute & getAttribute(const std::string & attribute_name) const;
template <typename T>
void has(const Attribute & attribute, const ConstColumnPlainPtrs & key_columns, PaddedPODArray<UInt8> & out) const;
const std::string name;
const DictionaryStructure dict_struct;
const DictionarySourcePtr source_ptr;
const DictionaryLifetime dict_lifetime;
const bool require_nonempty;
const std::string key_description{dict_struct.getKeyDescription()};
btrie_t *trie;
std::map<std::string, std::size_t> attribute_index_by_name;
std::vector<Attribute> attributes;
std::size_t bytes_allocated = 0;
std::size_t element_count = 0;
std::size_t bucket_count = 0;
mutable std::atomic<std::size_t> query_count{0};
std::chrono::time_point<std::chrono::system_clock> creation_time;
std::exception_ptr creation_exception;
};
}

View File

@ -23,6 +23,7 @@
#include <Dictionaries/ComplexKeyHashedDictionary.h>
#include <Dictionaries/ComplexKeyCacheDictionary.h>
#include <Dictionaries/RangeHashedDictionary.h>
#include <Dictionaries/TrieDictionary.h>
#include <ext/range.hpp>
@ -102,7 +103,8 @@ private:
!executeDispatchSimple<HashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchSimple<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr))
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr))
throw Exception{
"Unsupported dictionary type " + dict_ptr->getTypeName(),
ErrorCodes::UNKNOWN_TYPE};
@ -285,6 +287,7 @@ private:
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchRange<RangeHashedDictionary>(block, arguments, result, dict_ptr))
throw Exception{
"Unsupported dictionary type " + dict_ptr->getTypeName(),
@ -551,7 +554,8 @@ private:
!executeDispatch<HashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr))
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr))
throw Exception{
"Unsupported dictionary type " + dict_ptr->getTypeName(),
ErrorCodes::UNKNOWN_TYPE};
@ -844,6 +848,7 @@ private:
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchRange<RangeHashedDictionary>(block, arguments, result, dict_ptr))
throw Exception{
"Unsupported dictionary type " + dict_ptr->getTypeName(),
@ -1153,7 +1158,8 @@ private:
!executeDispatch<HashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatch<CacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyHashedDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr))
!executeDispatchComplex<ComplexKeyCacheDictionary>(block, arguments, result, dict_ptr) &&
!executeDispatchComplex<TrieDictionary>(block, arguments, result, dict_ptr))
throw Exception{
"Unsupported dictionary type " + dict_ptr->getTypeName(),
ErrorCodes::UNKNOWN_TYPE};

View File

@ -6,6 +6,7 @@
#include <Dictionaries/RangeHashedDictionary.h>
#include <Dictionaries/ComplexKeyHashedDictionary.h>
#include <Dictionaries/ComplexKeyCacheDictionary.h>
#include <Dictionaries/TrieDictionary.h>
#include <Dictionaries/DictionaryStructure.h>
#include <memory>
@ -81,6 +82,15 @@ DictionaryPtr DictionaryFactory::create(const std::string & name, Poco::Util::Ab
return std::make_unique<ComplexKeyCacheDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, size);
}
else if ("ip_trie" == layout_type)
{
if (!dict_struct.key)
throw Exception{"'key' is required for dictionary of layout 'ip_trie'",
ErrorCodes::BAD_ARGUMENTS};
// This is specialised trie for storing IPv4 and IPv6 prefixes.
return std::make_unique<TrieDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
}
else
{
if (dict_struct.key)

View File

@ -101,7 +101,10 @@ The dictionary config file has the following format:
<cache>
<!- - Cache size in number of cells; rounded up to a degree of two. - ->
<size_in_cells>1000000000</size_in_cells>
</cache> -->
</cache>
or
<ip_trie />
-->
</layout>
<!-- Structure. -->
@ -243,6 +246,58 @@ Example of a dictionary by ranges:
</dictionary>
</dictionaries>
ip_trie
-------
The table stores IP prefixes for each key (IP address), which makes it possible to map IP addresses to metadata such as ASN or threat score.
Example: in the table there are prefixes matches to AS number and country:
::
prefix asn cca2
202.79.32.0/20 17501 NP
2620:0:870::/48 3856 US
2a02:6b8:1::/48 13238 RU
2001:db8::/32 65536 ZZ
When using such a layout, the structure should have the "key" element.
Example:
.. code-block:: xml
<structure>
<key>
<attribute>
<name>prefix</name>
<type>String</type>
</attribute>
</key>
<attribute>
<name>asn</name>
<type>UInt32</type>
<null_value />
</attribute>
<attribute>
<name>cca2</name>
<type>String</type>
<null_value>??</null_value>
</attribute>
...
These key must have only one attribute of type String, containing a valid IP prefix. Other types are not yet supported.
For querying, same functions (dictGetT with tuple) as for complex key dictionaries have to be used:
``dictGetT('dict_name', 'attr_name', tuple(ip))``
The function accepts either UInt32 for IPv4 address or FixedString(16) for IPv6 address in wire format:
``dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1')))``
No other type is supported. The function returns attribute for a prefix matching the given IP address. If there are overlapping prefixes, the most specific one is returned.
The data is stored currently in a bitwise trie, it has to fit in memory.
complex_key_hashed
----------------