mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Add new dictionary layout (sparse_hashed) that is more memory efficient
With this new layout, sparsehash will be used over default HashMap, sparsehash is more memory efficient but it is also slower. So in a nutshell: - HashMap uses ~2x more memory then sparse_hash_map - HashMap ~2-2.5x faster then sparse_hash_map (tested on lots of input, and the most close to production was dictionary with 600KK hashes and UInt16 as value) TODO: - fix allocated memory calculation - getBufferSizeInBytes/getBufferSizeInCells interface - benchmarks v0: replace HashMap with google::sparse_hash_map v2: use google::sparse_hash_map only when <sparse> isset to true v3: replace attributes with different layout v4: use ch hash over std::hash
This commit is contained in:
parent
1fe79ad43c
commit
420089c301
@ -40,3 +40,5 @@ if(USE_POCO_MONGODB)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_subdirectory(Embedded)
|
add_subdirectory(Embedded)
|
||||||
|
|
||||||
|
target_include_directories(clickhouse_dictionaries SYSTEM PRIVATE ${SPARSEHASH_INCLUDE_DIR})
|
||||||
|
@ -3,6 +3,23 @@
|
|||||||
#include "DictionaryBlockInputStream.h"
|
#include "DictionaryBlockInputStream.h"
|
||||||
#include "DictionaryFactory.h"
|
#include "DictionaryFactory.h"
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
|
||||||
|
/// google::sparse_hash_map
|
||||||
|
template <class T> auto first(const T &lhs) -> decltype(lhs.first)
|
||||||
|
{ return lhs.first; }
|
||||||
|
template <class T> auto second(const T &lhs) -> decltype(lhs.second)
|
||||||
|
{ return lhs.second; }
|
||||||
|
|
||||||
|
/// HashMap
|
||||||
|
template <class T> auto first(const T &lhs) -> decltype(lhs.getFirst())
|
||||||
|
{ return lhs.getFirst(); }
|
||||||
|
template <class T> auto second(const T &lhs) -> decltype(lhs.getSecond())
|
||||||
|
{ return lhs.getSecond(); }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
@ -21,12 +38,14 @@ HashedDictionary::HashedDictionary(
|
|||||||
DictionarySourcePtr source_ptr_,
|
DictionarySourcePtr source_ptr_,
|
||||||
const DictionaryLifetime dict_lifetime_,
|
const DictionaryLifetime dict_lifetime_,
|
||||||
bool require_nonempty_,
|
bool require_nonempty_,
|
||||||
|
bool sparse_,
|
||||||
BlockPtr saved_block_)
|
BlockPtr saved_block_)
|
||||||
: name{name_}
|
: name{name_}
|
||||||
, dict_struct(dict_struct_)
|
, dict_struct(dict_struct_)
|
||||||
, source_ptr{std::move(source_ptr_)}
|
, source_ptr{std::move(source_ptr_)}
|
||||||
, dict_lifetime(dict_lifetime_)
|
, dict_lifetime(dict_lifetime_)
|
||||||
, require_nonempty(require_nonempty_)
|
, require_nonempty(require_nonempty_)
|
||||||
|
, sparse(sparse_)
|
||||||
, saved_block{std::move(saved_block_)}
|
, saved_block{std::move(saved_block_)}
|
||||||
{
|
{
|
||||||
createAttributes();
|
createAttributes();
|
||||||
@ -57,11 +76,10 @@ static inline HashedDictionary::Key getAt(const HashedDictionary::Key & value, c
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename ChildType, typename AncestorType>
|
template <typename AttrType, typename ChildType, typename AncestorType>
|
||||||
void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
|
void HashedDictionary::isInAttrImpl(const AttrType & attr, const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
|
||||||
{
|
{
|
||||||
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
|
const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
|
||||||
const auto & attr = *std::get<CollectionPtrType<Key>>(hierarchical_attribute->maps);
|
|
||||||
const auto rows = out.size();
|
const auto rows = out.size();
|
||||||
|
|
||||||
for (const auto row : ext::range(0, rows))
|
for (const auto row : ext::range(0, rows))
|
||||||
@ -73,7 +91,7 @@ void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType
|
|||||||
{
|
{
|
||||||
auto it = attr.find(id);
|
auto it = attr.find(id);
|
||||||
if (it != std::end(attr))
|
if (it != std::end(attr))
|
||||||
id = it->getSecond();
|
id = second(*it);
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -83,6 +101,13 @@ void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType
|
|||||||
|
|
||||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
template <typename ChildType, typename AncestorType>
|
||||||
|
void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
|
||||||
|
{
|
||||||
|
if (!sparse)
|
||||||
|
return isInAttrImpl(*std::get<CollectionPtrType<Key>>(hierarchical_attribute->maps), child_ids, ancestor_ids, out);
|
||||||
|
return isInAttrImpl(*std::get<SparseCollectionPtrType<Key>>(hierarchical_attribute->sparse_maps), child_ids, ancestor_ids, out);
|
||||||
|
}
|
||||||
|
|
||||||
void HashedDictionary::isInVectorVector(
|
void HashedDictionary::isInVectorVector(
|
||||||
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
|
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
|
||||||
@ -407,9 +432,22 @@ void HashedDictionary::loadData()
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void HashedDictionary::addAttributeSize(const Attribute & attribute)
|
void HashedDictionary::addAttributeSize(const Attribute & attribute)
|
||||||
{
|
{
|
||||||
const auto & map_ref = std::get<CollectionPtrType<T>>(attribute.maps);
|
if (!sparse)
|
||||||
bytes_allocated += sizeof(CollectionType<T>) + map_ref->getBufferSizeInBytes();
|
{
|
||||||
bucket_count = map_ref->getBufferSizeInCells();
|
const auto & map_ref = std::get<CollectionPtrType<T>>(attribute.maps);
|
||||||
|
bytes_allocated += sizeof(CollectionType<T>) + map_ref->getBufferSizeInBytes();
|
||||||
|
bucket_count = map_ref->getBufferSizeInCells();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
const auto & map_ref = std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps);
|
||||||
|
bucket_count = map_ref->bucket_count();
|
||||||
|
|
||||||
|
/** TODO: more accurate calculation */
|
||||||
|
bytes_allocated += sizeof(CollectionType<T>);
|
||||||
|
bytes_allocated += bucket_count;
|
||||||
|
bytes_allocated += map_ref->size() * sizeof(Key) * sizeof(T);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HashedDictionary::calculateBytesAllocated()
|
void HashedDictionary::calculateBytesAllocated()
|
||||||
@ -479,12 +517,15 @@ template <typename T>
|
|||||||
void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
void HashedDictionary::createAttributeImpl(Attribute & attribute, const Field & null_value)
|
||||||
{
|
{
|
||||||
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
|
attribute.null_values = T(null_value.get<NearestFieldType<T>>());
|
||||||
attribute.maps = std::make_unique<CollectionType<T>>();
|
if (!sparse)
|
||||||
|
attribute.maps = std::make_unique<CollectionType<T>>();
|
||||||
|
else
|
||||||
|
attribute.sparse_maps = std::make_unique<SparseCollectionType<T>>();
|
||||||
}
|
}
|
||||||
|
|
||||||
HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
|
||||||
{
|
{
|
||||||
Attribute attr{type, {}, {}, {}};
|
Attribute attr{type, {}, {}, {}, {}};
|
||||||
|
|
||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
@ -535,7 +576,10 @@ HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const Attr
|
|||||||
case AttributeUnderlyingType::utString:
|
case AttributeUnderlyingType::utString:
|
||||||
{
|
{
|
||||||
attr.null_values = null_value.get<String>();
|
attr.null_values = null_value.get<String>();
|
||||||
attr.maps = std::make_unique<CollectionType<StringRef>>();
|
if (!sparse)
|
||||||
|
attr.maps = std::make_unique<CollectionType<StringRef>>();
|
||||||
|
else
|
||||||
|
attr.sparse_maps = std::make_unique<SparseCollectionType<StringRef>>();
|
||||||
attr.string_arena = std::make_unique<Arena>();
|
attr.string_arena = std::make_unique<Arena>();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -545,28 +589,43 @@ HashedDictionary::Attribute HashedDictionary::createAttributeWithType(const Attr
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
|
||||||
void HashedDictionary::getItemsImpl(
|
void HashedDictionary::getItemsAttrImpl(
|
||||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||||
{
|
{
|
||||||
const auto & attr = *std::get<CollectionPtrType<AttributeType>>(attribute.maps);
|
|
||||||
const auto rows = ext::size(ids);
|
const auto rows = ext::size(ids);
|
||||||
|
|
||||||
for (const auto i : ext::range(0, rows))
|
for (const auto i : ext::range(0, rows))
|
||||||
{
|
{
|
||||||
const auto it = attr.find(ids[i]);
|
const auto it = attr.find(ids[i]);
|
||||||
set_value(i, it != attr.end() ? static_cast<OutputType>(it->getSecond()) : get_default(i));
|
set_value(i, it != attr.end() ? static_cast<OutputType>(second(*it)) : get_default(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||||
|
void HashedDictionary::getItemsImpl(
|
||||||
|
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const
|
||||||
|
{
|
||||||
|
if (!sparse)
|
||||||
|
return getItemsAttrImpl<OutputType>(*std::get<CollectionPtrType<AttributeType>>(attribute.maps), ids, set_value, get_default);
|
||||||
|
return getItemsAttrImpl<OutputType>(*std::get<SparseCollectionPtrType<AttributeType>>(attribute.sparse_maps), ids, set_value, get_default);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T value)
|
bool HashedDictionary::setAttributeValueImpl(Attribute & attribute, const Key id, const T value)
|
||||||
{
|
{
|
||||||
auto & map = *std::get<CollectionPtrType<T>>(attribute.maps);
|
if (!sparse)
|
||||||
return map.insert({id, value}).second;
|
{
|
||||||
|
auto & map = *std::get<CollectionPtrType<T>>(attribute.maps);
|
||||||
|
return map.insert({id, value}).second;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto & map = *std::get<SparseCollectionPtrType<T>>(attribute.sparse_maps);
|
||||||
|
return map.insert({id, value}).second;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
|
bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, const Field & value)
|
||||||
@ -605,10 +664,18 @@ bool HashedDictionary::setAttributeValue(Attribute & attribute, const Key id, co
|
|||||||
|
|
||||||
case AttributeUnderlyingType::utString:
|
case AttributeUnderlyingType::utString:
|
||||||
{
|
{
|
||||||
auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
|
|
||||||
const auto & string = value.get<String>();
|
const auto & string = value.get<String>();
|
||||||
const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
const auto string_in_arena = attribute.string_arena->insert(string.data(), string.size());
|
||||||
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
|
if (!sparse)
|
||||||
|
{
|
||||||
|
auto & map = *std::get<CollectionPtrType<StringRef>>(attribute.maps);
|
||||||
|
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
auto & map = *std::get<SparseCollectionPtrType<StringRef>>(attribute.sparse_maps);
|
||||||
|
return map.insert({id, StringRef{string_in_arena, string.size()}}).second;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -636,18 +703,23 @@ void HashedDictionary::has(const Attribute & attribute, const PaddedPODArray<Key
|
|||||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T, typename AttrType>
|
||||||
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute & attribute) const
|
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIdsAttrImpl(const AttrType & attr) const
|
||||||
{
|
{
|
||||||
const HashMap<UInt64, T> & attr = *std::get<CollectionPtrType<T>>(attribute.maps);
|
|
||||||
|
|
||||||
PaddedPODArray<Key> ids;
|
PaddedPODArray<Key> ids;
|
||||||
ids.reserve(attr.size());
|
ids.reserve(attr.size());
|
||||||
for (const auto & value : attr)
|
for (const auto & value : attr)
|
||||||
ids.push_back(value.getFirst());
|
ids.push_back(first(value));
|
||||||
|
|
||||||
return ids;
|
return ids;
|
||||||
}
|
}
|
||||||
|
template <typename T>
|
||||||
|
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute & attribute) const
|
||||||
|
{
|
||||||
|
if (!sparse)
|
||||||
|
return getIdsAttrImpl<T>(*std::get<CollectionPtrType<Key>>(attribute.maps));
|
||||||
|
return getIdsAttrImpl<T>(*std::get<SparseCollectionPtrType<Key>>(attribute.sparse_maps));
|
||||||
|
}
|
||||||
|
|
||||||
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
|
PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds() const
|
||||||
{
|
{
|
||||||
@ -714,9 +786,11 @@ void registerDictionaryHashed(DictionaryFactory & factory)
|
|||||||
ErrorCodes::BAD_ARGUMENTS};
|
ErrorCodes::BAD_ARGUMENTS};
|
||||||
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
|
||||||
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
|
||||||
return std::make_unique<HashedDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty);
|
const bool sparse = name == "sparse_hashed";
|
||||||
|
return std::make_unique<HashedDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, require_nonempty, sparse);
|
||||||
};
|
};
|
||||||
factory.registerLayout("hashed", create_layout);
|
factory.registerLayout("hashed", create_layout);
|
||||||
|
factory.registerLayout("sparse_hashed", create_layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include <Columns/ColumnString.h>
|
#include <Columns/ColumnString.h>
|
||||||
#include <Core/Block.h>
|
#include <Core/Block.h>
|
||||||
#include <Common/HashTable/HashMap.h>
|
#include <Common/HashTable/HashMap.h>
|
||||||
|
#include <sparsehash/sparse_hash_map>
|
||||||
#include <ext/range.h>
|
#include <ext/range.h>
|
||||||
#include "DictionaryStructure.h"
|
#include "DictionaryStructure.h"
|
||||||
#include "IDictionary.h"
|
#include "IDictionary.h"
|
||||||
@ -26,6 +27,7 @@ public:
|
|||||||
DictionarySourcePtr source_ptr_,
|
DictionarySourcePtr source_ptr_,
|
||||||
const DictionaryLifetime dict_lifetime_,
|
const DictionaryLifetime dict_lifetime_,
|
||||||
bool require_nonempty_,
|
bool require_nonempty_,
|
||||||
|
bool sparse_,
|
||||||
BlockPtr saved_block_ = nullptr);
|
BlockPtr saved_block_ = nullptr);
|
||||||
|
|
||||||
std::string getName() const override { return name; }
|
std::string getName() const override { return name; }
|
||||||
@ -46,7 +48,7 @@ public:
|
|||||||
|
|
||||||
std::shared_ptr<const IExternalLoadable> clone() const override
|
std::shared_ptr<const IExternalLoadable> clone() const override
|
||||||
{
|
{
|
||||||
return std::make_shared<HashedDictionary>(name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, saved_block);
|
return std::make_shared<HashedDictionary>(name, dict_struct, source_ptr->clone(), dict_lifetime, require_nonempty, sparse, saved_block);
|
||||||
}
|
}
|
||||||
|
|
||||||
const IDictionarySource * getSource() const override { return source_ptr.get(); }
|
const IDictionarySource * getSource() const override { return source_ptr.get(); }
|
||||||
@ -149,6 +151,11 @@ private:
|
|||||||
template <typename Value>
|
template <typename Value>
|
||||||
using CollectionPtrType = std::unique_ptr<CollectionType<Value>>;
|
using CollectionPtrType = std::unique_ptr<CollectionType<Value>>;
|
||||||
|
|
||||||
|
template <typename Value>
|
||||||
|
using SparseCollectionType = google::sparse_hash_map<UInt64, Value, DefaultHash<UInt64>>;
|
||||||
|
template <typename Value>
|
||||||
|
using SparseCollectionPtrType = std::unique_ptr<SparseCollectionType<Value>>;
|
||||||
|
|
||||||
struct Attribute final
|
struct Attribute final
|
||||||
{
|
{
|
||||||
AttributeUnderlyingType type;
|
AttributeUnderlyingType type;
|
||||||
@ -186,6 +193,23 @@ private:
|
|||||||
CollectionPtrType<Float64>,
|
CollectionPtrType<Float64>,
|
||||||
CollectionPtrType<StringRef>>
|
CollectionPtrType<StringRef>>
|
||||||
maps;
|
maps;
|
||||||
|
std::variant<
|
||||||
|
SparseCollectionPtrType<UInt8>,
|
||||||
|
SparseCollectionPtrType<UInt16>,
|
||||||
|
SparseCollectionPtrType<UInt32>,
|
||||||
|
SparseCollectionPtrType<UInt64>,
|
||||||
|
SparseCollectionPtrType<UInt128>,
|
||||||
|
SparseCollectionPtrType<Int8>,
|
||||||
|
SparseCollectionPtrType<Int16>,
|
||||||
|
SparseCollectionPtrType<Int32>,
|
||||||
|
SparseCollectionPtrType<Int64>,
|
||||||
|
SparseCollectionPtrType<Decimal32>,
|
||||||
|
SparseCollectionPtrType<Decimal64>,
|
||||||
|
SparseCollectionPtrType<Decimal128>,
|
||||||
|
SparseCollectionPtrType<Float32>,
|
||||||
|
SparseCollectionPtrType<Float64>,
|
||||||
|
SparseCollectionPtrType<StringRef>>
|
||||||
|
sparse_maps;
|
||||||
std::unique_ptr<Arena> string_arena;
|
std::unique_ptr<Arena> string_arena;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -207,6 +231,9 @@ private:
|
|||||||
|
|
||||||
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
|
||||||
|
|
||||||
|
template <typename OutputType, typename AttrType, typename ValueSetter, typename DefaultGetter>
|
||||||
|
void getItemsAttrImpl(
|
||||||
|
const AttrType & attr, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||||
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
|
||||||
void getItemsImpl(
|
void getItemsImpl(
|
||||||
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
const Attribute & attribute, const PaddedPODArray<Key> & ids, ValueSetter && set_value, DefaultGetter && get_default) const;
|
||||||
@ -221,11 +248,15 @@ private:
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
|
void has(const Attribute & attribute, const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const;
|
||||||
|
|
||||||
|
template <typename T, typename AttrType>
|
||||||
|
PaddedPODArray<Key> getIdsAttrImpl(const AttrType & attr) const;
|
||||||
template <typename T>
|
template <typename T>
|
||||||
PaddedPODArray<Key> getIds(const Attribute & attribute) const;
|
PaddedPODArray<Key> getIds(const Attribute & attribute) const;
|
||||||
|
|
||||||
PaddedPODArray<Key> getIds() const;
|
PaddedPODArray<Key> getIds() const;
|
||||||
|
|
||||||
|
template <typename AttrType, typename ChildType, typename AncestorType>
|
||||||
|
void isInAttrImpl(const AttrType & attr, const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
|
||||||
template <typename ChildType, typename AncestorType>
|
template <typename ChildType, typename AncestorType>
|
||||||
void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
|
void isInImpl(const ChildType & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
|
||||||
|
|
||||||
@ -234,6 +265,7 @@ private:
|
|||||||
const DictionarySourcePtr source_ptr;
|
const DictionarySourcePtr source_ptr;
|
||||||
const DictionaryLifetime dict_lifetime;
|
const DictionaryLifetime dict_lifetime;
|
||||||
const bool require_nonempty;
|
const bool require_nonempty;
|
||||||
|
const bool sparse;
|
||||||
|
|
||||||
std::map<std::string, size_t> attribute_index_by_name;
|
std::map<std::string, size_t> attribute_index_by_name;
|
||||||
std::vector<Attribute> attributes;
|
std::vector<Attribute> attributes;
|
||||||
|
@ -33,7 +33,7 @@ if (OPENSSL_CRYPTO_LIBRARY)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
target_include_directories(clickhouse_functions PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include)
|
target_include_directories(clickhouse_functions PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||||
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${DIVIDE_INCLUDE_DIR} ${METROHASH_INCLUDE_DIR})
|
target_include_directories(clickhouse_functions SYSTEM PRIVATE ${DIVIDE_INCLUDE_DIR} ${METROHASH_INCLUDE_DIR} ${SPARSEHASH_INCLUDE_DIR})
|
||||||
|
|
||||||
if (CONSISTENT_HASHING_INCLUDE_DIR)
|
if (CONSISTENT_HASHING_INCLUDE_DIR)
|
||||||
target_include_directories (clickhouse_functions PRIVATE ${CONSISTENT_HASHING_INCLUDE_DIR})
|
target_include_directories (clickhouse_functions PRIVATE ${CONSISTENT_HASHING_INCLUDE_DIR})
|
||||||
|
@ -125,6 +125,69 @@
|
|||||||
</structure>
|
</structure>
|
||||||
</dictionary>
|
</dictionary>
|
||||||
|
|
||||||
|
<dictionary>
|
||||||
|
<name>hashed_sparse_ints</name>
|
||||||
|
<source>
|
||||||
|
<clickhouse>
|
||||||
|
<host>localhost</host>
|
||||||
|
<port>9000</port>
|
||||||
|
<user>default</user>
|
||||||
|
<password></password>
|
||||||
|
<db>test_00950</db>
|
||||||
|
<table>ints</table>
|
||||||
|
</clickhouse>
|
||||||
|
</source>
|
||||||
|
<lifetime>0</lifetime>
|
||||||
|
<layout>
|
||||||
|
<sparse_hashed/>
|
||||||
|
</layout>
|
||||||
|
<structure>
|
||||||
|
<id>
|
||||||
|
<name>key</name>
|
||||||
|
</id>
|
||||||
|
<attribute>
|
||||||
|
<name>i8</name>
|
||||||
|
<type>Int8</type>
|
||||||
|
<null_value>0</null_value>
|
||||||
|
</attribute>
|
||||||
|
<attribute>
|
||||||
|
<name>i16</name>
|
||||||
|
<type>Int16</type>
|
||||||
|
<null_value>0</null_value>
|
||||||
|
</attribute>
|
||||||
|
<attribute>
|
||||||
|
<name>i32</name>
|
||||||
|
<type>Int32</type>
|
||||||
|
<null_value>0</null_value>
|
||||||
|
</attribute>
|
||||||
|
<attribute>
|
||||||
|
<name>i64</name>
|
||||||
|
<type>Int64</type>
|
||||||
|
<null_value>0</null_value>
|
||||||
|
</attribute>
|
||||||
|
<attribute>
|
||||||
|
<name>u8</name>
|
||||||
|
<type>UInt8</type>
|
||||||
|
<null_value>0</null_value>
|
||||||
|
</attribute>
|
||||||
|
<attribute>
|
||||||
|
<name>u16</name>
|
||||||
|
<type>UInt16</type>
|
||||||
|
<null_value>0</null_value>
|
||||||
|
</attribute>
|
||||||
|
<attribute>
|
||||||
|
<name>u32</name>
|
||||||
|
<type>UInt32</type>
|
||||||
|
<null_value>0</null_value>
|
||||||
|
</attribute>
|
||||||
|
<attribute>
|
||||||
|
<name>u64</name>
|
||||||
|
<type>UInt64</type>
|
||||||
|
<null_value>0</null_value>
|
||||||
|
</attribute>
|
||||||
|
</structure>
|
||||||
|
</dictionary>
|
||||||
|
|
||||||
<dictionary>
|
<dictionary>
|
||||||
<name>cache_ints</name>
|
<name>cache_ints</name>
|
||||||
<source>
|
<source>
|
||||||
|
@ -4,6 +4,9 @@ dictGetOrDefault flat_ints 0 42 42 42 42 42 42 42 42
|
|||||||
dictGet hashed_ints 1 1 1 1 1 1 1 1 1
|
dictGet hashed_ints 1 1 1 1 1 1 1 1 1
|
||||||
dictGetOrDefault hashed_ints 1 1 1 1 1 1 1 1 1
|
dictGetOrDefault hashed_ints 1 1 1 1 1 1 1 1 1
|
||||||
dictGetOrDefault hashed_ints 0 42 42 42 42 42 42 42 42
|
dictGetOrDefault hashed_ints 0 42 42 42 42 42 42 42 42
|
||||||
|
dictGet hashed_sparse_ints 1 1 1 1 1 1 1 1 1
|
||||||
|
dictGetOrDefault hashed_sparse_ints 1 1 1 1 1 1 1 1 1
|
||||||
|
dictGetOrDefault hashed_sparse_ints 0 42 42 42 42 42 42 42 42
|
||||||
dictGet cache_ints 1 1 1 1 1 1 1 1 1
|
dictGet cache_ints 1 1 1 1 1 1 1 1 1
|
||||||
dictGetOrDefault cache_ints 1 1 1 1 1 1 1 1 1
|
dictGetOrDefault cache_ints 1 1 1 1 1 1 1 1 1
|
||||||
dictGetOrDefault cache_ints 0 42 42 42 42 42 42 42 42
|
dictGetOrDefault cache_ints 0 42 42 42 42 42 42 42 42
|
||||||
|
@ -69,6 +69,34 @@ select 'dictGetOrDefault', 'hashed_ints' as dict_name, toUInt64(0) as k,
|
|||||||
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
|
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
|
||||||
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
|
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
|
||||||
|
|
||||||
|
select 'dictGet', 'hashed_sparse_ints' as dict_name, toUInt64(1) as k,
|
||||||
|
dictGet(dict_name, 'i8', k),
|
||||||
|
dictGet(dict_name, 'i16', k),
|
||||||
|
dictGet(dict_name, 'i32', k),
|
||||||
|
dictGet(dict_name, 'i64', k),
|
||||||
|
dictGet(dict_name, 'u8', k),
|
||||||
|
dictGet(dict_name, 'u16', k),
|
||||||
|
dictGet(dict_name, 'u32', k),
|
||||||
|
dictGet(dict_name, 'u64', k);
|
||||||
|
select 'dictGetOrDefault', 'hashed_sparse_ints' as dict_name, toUInt64(1) as k,
|
||||||
|
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
|
||||||
|
select 'dictGetOrDefault', 'hashed_sparse_ints' as dict_name, toUInt64(0) as k,
|
||||||
|
dictGetOrDefault(dict_name, 'i8', k, toInt8(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'i16', k, toInt16(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'i32', k, toInt32(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'i64', k, toInt64(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'u8', k, toUInt8(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'u16', k, toUInt16(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'u32', k, toUInt32(42)),
|
||||||
|
dictGetOrDefault(dict_name, 'u64', k, toUInt64(42));
|
||||||
|
|
||||||
select 'dictGet', 'cache_ints' as dict_name, toUInt64(1) as k,
|
select 'dictGet', 'cache_ints' as dict_name, toUInt64(1) as k,
|
||||||
dictGet(dict_name, 'i8', k),
|
dictGet(dict_name, 'i8', k),
|
||||||
dictGet(dict_name, 'i16', k),
|
dictGet(dict_name, 'i16', k),
|
||||||
|
@ -39,6 +39,7 @@ The configuration looks like this:
|
|||||||
|
|
||||||
- [flat](#flat)
|
- [flat](#flat)
|
||||||
- [hashed](#dicts-external_dicts_dict_layout-hashed)
|
- [hashed](#dicts-external_dicts_dict_layout-hashed)
|
||||||
|
- [sparse_hashed](#dicts-external_dicts_dict_layout-sparse_hashed)
|
||||||
- [cache](#cache)
|
- [cache](#cache)
|
||||||
- [range_hashed](#range-hashed)
|
- [range_hashed](#range-hashed)
|
||||||
- [complex_key_hashed](#complex-key-hashed)
|
- [complex_key_hashed](#complex-key-hashed)
|
||||||
@ -77,6 +78,18 @@ Configuration example:
|
|||||||
</layout>
|
</layout>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### sparse_hashed {#dicts-external_dicts_dict_layout-sparse_hashed}
|
||||||
|
|
||||||
|
Similar to `hashed`, but uses less memory in favor more CPU usage.
|
||||||
|
|
||||||
|
Configuration example:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<layout>
|
||||||
|
<sparse_hashed />
|
||||||
|
</layout>
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
### complex_key_hashed
|
### complex_key_hashed
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user