2015-01-29 13:53:48 +00:00
|
|
|
#pragma once
|
|
|
|
|
2015-05-08 12:31:00 +00:00
|
|
|
#include <atomic>
|
2015-02-10 21:10:58 +00:00
|
|
|
#include <memory>
|
2018-11-11 19:29:52 +00:00
|
|
|
#include <variant>
|
2021-01-21 14:42:50 +00:00
|
|
|
#include <optional>
|
2021-12-14 06:12:42 +00:00
|
|
|
#include <sparsehash/sparse_hash_map>
|
2022-01-22 20:01:45 +00:00
|
|
|
#include <sparsehash/sparse_hash_set>
|
2021-03-24 16:31:00 +00:00
|
|
|
|
|
|
|
#include <Common/HashTable/HashMap.h>
|
|
|
|
#include <Common/HashTable/HashSet.h>
|
|
|
|
#include <Core/Block.h>
|
|
|
|
|
|
|
|
#include <Dictionaries/DictionaryStructure.h>
|
|
|
|
#include <Dictionaries/IDictionary.h>
|
|
|
|
#include <Dictionaries/IDictionarySource.h>
|
|
|
|
#include <Dictionaries/DictionaryHelpers.h>
|
2015-01-29 13:53:48 +00:00
|
|
|
|
2019-09-22 02:09:40 +00:00
|
|
|
/** This dictionary stores all content in a hash table in memory
|
|
|
|
* (a separate Key -> Value map for each attribute)
|
2019-09-22 02:11:08 +00:00
|
|
|
* Two variants of hash table are supported: a fast HashMap and memory efficient sparse_hash_map.
|
2019-09-22 02:09:40 +00:00
|
|
|
*/
|
2015-07-10 14:43:49 +00:00
|
|
|
|
2015-01-29 13:53:48 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
2018-01-15 12:44:39 +00:00
|
|
|
|
2023-01-18 14:29:21 +00:00
|
|
|
struct HashedDictionaryConfiguration
|
2021-05-09 09:12:21 +00:00
|
|
|
{
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
const UInt64 shards;
|
2022-11-25 10:14:51 +00:00
|
|
|
const UInt64 shard_load_queue_backlog;
|
2021-05-09 09:12:21 +00:00
|
|
|
const bool require_nonempty;
|
|
|
|
const DictionaryLifetime lifetime;
|
|
|
|
};
|
|
|
|
|
2022-11-23 12:35:35 +00:00
|
|
|
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
|
|
|
|
class ParallelDictionaryLoader;
|
|
|
|
|
2022-08-11 17:16:21 +00:00
|
|
|
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
|
2015-01-29 13:53:48 +00:00
|
|
|
class HashedDictionary final : public IDictionary
|
|
|
|
{
|
2022-11-23 12:35:35 +00:00
|
|
|
friend class ParallelDictionaryLoader<dictionary_key_type, sparse, sharded>;
|
|
|
|
|
2015-01-29 13:53:48 +00:00
|
|
|
public:
|
2021-08-17 17:35:43 +00:00
|
|
|
using KeyType = std::conditional_t<dictionary_key_type == DictionaryKeyType::Simple, UInt64, StringRef>;
|
2021-03-24 16:31:00 +00:00
|
|
|
|
2018-12-10 15:25:45 +00:00
|
|
|
HashedDictionary(
|
2020-07-14 18:46:29 +00:00
|
|
|
const StorageID & dict_id_,
|
2019-08-03 11:02:40 +00:00
|
|
|
const DictionaryStructure & dict_struct_,
|
|
|
|
DictionarySourcePtr source_ptr_,
|
2023-01-18 14:29:21 +00:00
|
|
|
const HashedDictionaryConfiguration & configuration_,
|
2021-04-30 22:23:22 +00:00
|
|
|
BlockPtr update_field_loaded_block_ = nullptr);
|
2022-12-11 16:36:01 +00:00
|
|
|
~HashedDictionary() override;
|
2015-01-29 13:53:48 +00:00
|
|
|
|
2021-03-24 16:31:00 +00:00
|
|
|
std::string getTypeName() const override
|
|
|
|
{
|
2021-08-17 17:35:43 +00:00
|
|
|
if constexpr (dictionary_key_type == DictionaryKeyType::Simple && sparse)
|
2021-03-24 16:31:00 +00:00
|
|
|
return "SparseHashed";
|
2021-08-17 17:35:43 +00:00
|
|
|
else if constexpr (dictionary_key_type == DictionaryKeyType::Simple && !sparse)
|
2021-03-24 16:31:00 +00:00
|
|
|
return "Hashed";
|
2021-08-17 17:35:43 +00:00
|
|
|
else if constexpr (dictionary_key_type == DictionaryKeyType::Complex && sparse)
|
2021-06-12 10:53:03 +00:00
|
|
|
return "ComplexKeySparseHashed";
|
2021-03-24 16:31:00 +00:00
|
|
|
else
|
|
|
|
return "ComplexKeyHashed";
|
|
|
|
}
|
2015-01-29 14:46:15 +00:00
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t getBytesAllocated() const override { return bytes_allocated; }
|
2015-03-24 11:30:16 +00:00
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
|
2015-05-08 12:31:00 +00:00
|
|
|
|
2021-05-05 07:56:21 +00:00
|
|
|
double getFoundRate() const override
|
|
|
|
{
|
|
|
|
size_t queries = query_count.load(std::memory_order_relaxed);
|
|
|
|
if (!queries)
|
|
|
|
return 0;
|
|
|
|
return static_cast<double>(found_count.load(std::memory_order_relaxed)) / queries;
|
|
|
|
}
|
|
|
|
|
2015-03-24 17:02:56 +00:00
|
|
|
double getHitRate() const override { return 1.0; }
|
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t getElementCount() const override { return element_count; }
|
2015-03-24 17:02:56 +00:00
|
|
|
|
|
|
|
double getLoadFactor() const override { return static_cast<double>(element_count) / bucket_count; }
|
|
|
|
|
2019-06-02 12:11:01 +00:00
|
|
|
std::shared_ptr<const IExternalLoadable> clone() const override
|
2019-01-19 23:27:52 +00:00
|
|
|
{
|
2022-08-11 17:16:21 +00:00
|
|
|
return std::make_shared<HashedDictionary<dictionary_key_type, sparse, sharded>>(
|
|
|
|
getDictionaryID(),
|
|
|
|
dict_struct,
|
|
|
|
source_ptr->clone(),
|
|
|
|
configuration,
|
|
|
|
update_field_loaded_block);
|
2019-01-19 23:27:52 +00:00
|
|
|
}
|
2015-01-30 13:43:16 +00:00
|
|
|
|
2021-12-15 12:55:28 +00:00
|
|
|
DictionarySourcePtr getSource() const override { return source_ptr; }
|
2015-01-29 15:47:21 +00:00
|
|
|
|
2021-05-09 09:12:21 +00:00
|
|
|
const DictionaryLifetime & getLifetime() const override { return configuration.lifetime; }
|
2015-01-30 15:18:13 +00:00
|
|
|
|
2015-03-24 13:59:19 +00:00
|
|
|
const DictionaryStructure & getStructure() const override { return dict_struct; }
|
|
|
|
|
2015-05-13 16:11:07 +00:00
|
|
|
bool isInjective(const std::string & attribute_name) const override
|
|
|
|
{
|
2021-03-24 16:31:00 +00:00
|
|
|
return dict_struct.getAttribute(attribute_name).injective;
|
2015-05-13 16:11:07 +00:00
|
|
|
}
|
|
|
|
|
2021-03-24 16:31:00 +00:00
|
|
|
DictionaryKeyType getKeyType() const override { return dictionary_key_type; }
|
2020-12-21 14:39:15 +00:00
|
|
|
|
|
|
|
ColumnPtr getColumn(
|
|
|
|
const std::string& attribute_name,
|
|
|
|
const DataTypePtr & result_type,
|
|
|
|
const Columns & key_columns,
|
|
|
|
const DataTypes & key_types,
|
2021-02-16 21:33:02 +00:00
|
|
|
const ColumnPtr & default_values_column) const override;
|
2020-12-21 14:39:15 +00:00
|
|
|
|
2021-01-23 13:18:24 +00:00
|
|
|
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
2015-01-29 13:53:48 +00:00
|
|
|
|
2021-08-17 17:35:43 +00:00
|
|
|
bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::Simple && dict_struct.hierarchical_attribute_index.has_value(); }
|
2021-03-24 16:31:00 +00:00
|
|
|
|
2021-03-25 13:32:25 +00:00
|
|
|
ColumnPtr getHierarchy(ColumnPtr key_column, const DataTypePtr & hierarchy_attribute_type) const override;
|
2021-03-24 16:31:00 +00:00
|
|
|
|
|
|
|
ColumnUInt8::Ptr isInHierarchy(
|
|
|
|
ColumnPtr key_column,
|
|
|
|
ColumnPtr in_key_column,
|
|
|
|
const DataTypePtr & key_type) const override;
|
2016-12-12 21:37:57 +00:00
|
|
|
|
2022-05-13 10:48:47 +00:00
|
|
|
DictionaryHierarchicalParentToChildIndexPtr getHierarchicalIndex() const override;
|
|
|
|
|
|
|
|
size_t getHierarchicalIndexBytesAllocated() const override { return hierarchical_index_bytes_allocated; }
|
2022-05-12 10:36:32 +00:00
|
|
|
|
2021-03-25 13:23:19 +00:00
|
|
|
ColumnPtr getDescendants(
|
|
|
|
ColumnPtr key_column,
|
|
|
|
const DataTypePtr & key_type,
|
2022-05-12 10:36:32 +00:00
|
|
|
size_t level,
|
2022-05-13 10:48:47 +00:00
|
|
|
DictionaryHierarchicalParentToChildIndexPtr parent_to_child_index) const override;
|
2021-03-25 13:23:19 +00:00
|
|
|
|
2021-10-21 14:17:53 +00:00
|
|
|
Pipe read(const Names & column_names, size_t max_block_size, size_t num_streams) const override;
|
2017-04-28 18:33:31 +00:00
|
|
|
|
2015-01-30 13:43:16 +00:00
|
|
|
private:
|
2018-12-10 15:25:45 +00:00
|
|
|
template <typename Value>
|
2021-03-25 17:38:30 +00:00
|
|
|
using CollectionTypeNonSparse = std::conditional_t<
|
2021-08-17 17:35:43 +00:00
|
|
|
dictionary_key_type == DictionaryKeyType::Simple,
|
2022-01-22 20:01:45 +00:00
|
|
|
HashMap<UInt64, Value, DefaultHash<UInt64>>,
|
2021-03-25 17:38:30 +00:00
|
|
|
HashMapWithSavedHash<StringRef, Value, DefaultHash<StringRef>>>;
|
|
|
|
|
2022-01-22 20:01:45 +00:00
|
|
|
using NoAttributesCollectionTypeNonSparse = std::conditional_t<
|
|
|
|
dictionary_key_type == DictionaryKeyType::Simple,
|
|
|
|
HashSet<UInt64, DefaultHash<UInt64>>,
|
|
|
|
HashSetWithSavedHash<StringRef, DefaultHash<StringRef>>>;
|
|
|
|
|
2021-12-14 06:12:42 +00:00
|
|
|
/// Here we use sparse_hash_map with DefaultHash<> for the following reasons:
|
2021-12-10 20:45:36 +00:00
|
|
|
///
|
|
|
|
/// - DefaultHash<> is used for HashMap
|
|
|
|
/// - DefaultHash<> (from HashTable/Hash.h> works better then std::hash<>
|
|
|
|
/// in case of sequential set of keys, but with random access to this set, i.e.
|
|
|
|
///
|
|
|
|
/// SELECT number FROM numbers(3000000) ORDER BY rand()
|
|
|
|
///
|
|
|
|
/// And even though std::hash<> works better in some other cases,
|
|
|
|
/// DefaultHash<> is preferred since the difference for this particular
|
|
|
|
/// case is significant, i.e. it can be 10x+.
|
2019-09-20 23:22:40 +00:00
|
|
|
template <typename Value>
|
2021-03-25 17:38:30 +00:00
|
|
|
using CollectionTypeSparse = std::conditional_t<
|
2021-08-17 17:35:43 +00:00
|
|
|
dictionary_key_type == DictionaryKeyType::Simple,
|
2021-12-14 06:12:42 +00:00
|
|
|
google::sparse_hash_map<UInt64, Value, DefaultHash<KeyType>>,
|
|
|
|
google::sparse_hash_map<StringRef, Value, DefaultHash<KeyType>>>;
|
2019-09-20 23:22:40 +00:00
|
|
|
|
2022-01-22 20:01:45 +00:00
|
|
|
using NoAttributesCollectionTypeSparse = google::sparse_hash_set<KeyType, DefaultHash<KeyType>>;
|
|
|
|
|
2021-03-24 16:31:00 +00:00
|
|
|
template <typename Value>
|
|
|
|
using CollectionType = std::conditional_t<sparse, CollectionTypeSparse<Value>, CollectionTypeNonSparse<Value>>;
|
2021-01-03 10:07:21 +00:00
|
|
|
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
template <typename Value>
|
|
|
|
using CollectionsHolder = std::vector<CollectionType<Value>>;
|
|
|
|
|
2022-01-22 20:01:45 +00:00
|
|
|
using NoAttributesCollectionType = std::conditional_t<sparse, NoAttributesCollectionTypeSparse, NoAttributesCollectionTypeNonSparse>;
|
|
|
|
|
2021-03-26 18:01:56 +00:00
|
|
|
using NullableSet = HashSet<KeyType, DefaultHash<KeyType>>;
|
2023-01-18 14:29:21 +00:00
|
|
|
using NullableSets = std::vector<NullableSet>;
|
2021-03-25 17:38:30 +00:00
|
|
|
|
2016-08-07 09:09:18 +00:00
|
|
|
struct Attribute final
|
2015-01-29 13:53:48 +00:00
|
|
|
{
|
2015-03-20 15:21:29 +00:00
|
|
|
AttributeUnderlyingType type;
|
2023-01-18 14:29:21 +00:00
|
|
|
std::optional<NullableSets> is_nullable_sets;
|
2021-03-26 18:01:56 +00:00
|
|
|
|
2019-09-20 23:22:40 +00:00
|
|
|
std::variant<
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
CollectionsHolder<UInt8>,
|
|
|
|
CollectionsHolder<UInt16>,
|
|
|
|
CollectionsHolder<UInt32>,
|
|
|
|
CollectionsHolder<UInt64>,
|
|
|
|
CollectionsHolder<UInt128>,
|
|
|
|
CollectionsHolder<UInt256>,
|
|
|
|
CollectionsHolder<Int8>,
|
|
|
|
CollectionsHolder<Int16>,
|
|
|
|
CollectionsHolder<Int32>,
|
|
|
|
CollectionsHolder<Int64>,
|
|
|
|
CollectionsHolder<Int128>,
|
|
|
|
CollectionsHolder<Int256>,
|
|
|
|
CollectionsHolder<Decimal32>,
|
|
|
|
CollectionsHolder<Decimal64>,
|
|
|
|
CollectionsHolder<Decimal128>,
|
|
|
|
CollectionsHolder<Decimal256>,
|
|
|
|
CollectionsHolder<DateTime64>,
|
|
|
|
CollectionsHolder<Float32>,
|
|
|
|
CollectionsHolder<Float64>,
|
|
|
|
CollectionsHolder<UUID>,
|
|
|
|
CollectionsHolder<IPv4>,
|
|
|
|
CollectionsHolder<IPv6>,
|
|
|
|
CollectionsHolder<StringRef>,
|
|
|
|
CollectionsHolder<Array>>
|
|
|
|
containers;
|
2015-01-29 13:53:48 +00:00
|
|
|
};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-06-07 21:07:44 +00:00
|
|
|
void createAttributes();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2023-01-13 12:30:13 +00:00
|
|
|
void blockToAttributes(const Block & block, DictionaryKeysArenaHolder<dictionary_key_type> & arena_holder, UInt64 shard);
|
2018-02-15 13:08:23 +00:00
|
|
|
|
2018-01-15 12:44:39 +00:00
|
|
|
void updateData();
|
|
|
|
|
2016-06-07 21:07:44 +00:00
|
|
|
void loadData();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2022-05-12 11:20:27 +00:00
|
|
|
void buildHierarchyParentToChildIndexIfNeeded();
|
|
|
|
|
2016-06-07 21:07:44 +00:00
|
|
|
void calculateBytesAllocated();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
UInt64 getShard(UInt64 key) const
|
|
|
|
{
|
2022-08-11 17:16:21 +00:00
|
|
|
if constexpr (!sharded)
|
|
|
|
return 0;
|
Optimize sharded dictionaries with skewed distribution
In case of skewed distribution simple division by module will not give
you good distribution between shards and eventually this can lead to
performance the same as non-sharded dictionary (except for it will
occupy +1 thread for Block::scatter).
But if HashedDictionary::blockToAttributes() will not have calls to
HashedDictionary::getShard() this can be fixed by using a more complex
key-to-shard (getShard()) mapping. And actually you do not need to call
getShard() in blockToAttributes() you can simply use passed shard, and
that's it.
And by wrapping key with intHash64() in getShard() skewed distribution
can be fixed.
Note, that previously I tried similar approach but did not removed
getShard() from blockToAttributes(), that's why it failed.
And now it works almost as fast as with simple createBlockSelector(),
just 13.6% slower (18.75min vs 16.5min, with 16 threads).
Note, that I've also tried to add libdivide for this, but it does not
improves the performance.
I've also tried the approach without scatter, and it works 20% slower
then this one (22.5min VS 18.75min, with 16 threads).
v2: Use intHashCRC32() over intHash64() for HashedDictionary::getShard()
(with intHash64() it works very slower, almost 2x slower, there was
18min with 32 threads)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-12-19 14:56:57 +00:00
|
|
|
/// NOTE: function here should not match with the DefaultHash<> since
|
|
|
|
/// it used for the HashMap/sparse_hash_map.
|
|
|
|
return intHashCRC32(key) % configuration.shards;
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
}
|
2023-01-03 14:16:02 +00:00
|
|
|
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
UInt64 getShard(StringRef key) const
|
|
|
|
{
|
2022-08-11 17:16:21 +00:00
|
|
|
if constexpr (!sharded)
|
|
|
|
return 0;
|
2023-01-13 12:33:37 +00:00
|
|
|
return StringRefHash()(key) % configuration.shards;
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
}
|
|
|
|
|
2021-06-12 10:53:03 +00:00
|
|
|
template <typename AttributeType, bool is_nullable, typename ValueSetter, typename DefaultValueExtractor>
|
2021-03-31 10:21:30 +00:00
|
|
|
void getItemsImpl(
|
|
|
|
const Attribute & attribute,
|
|
|
|
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
|
|
|
|
ValueSetter && set_value,
|
|
|
|
DefaultValueExtractor & default_value_extractor) const;
|
|
|
|
|
2023-01-18 14:29:21 +00:00
|
|
|
template <typename GetContainersFunc>
|
|
|
|
void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2023-01-18 14:29:21 +00:00
|
|
|
template <typename GetContainersFunc>
|
|
|
|
void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func) const;
|
2021-01-23 13:18:24 +00:00
|
|
|
|
2020-09-24 05:01:00 +00:00
|
|
|
void resize(size_t added_rows);
|
|
|
|
|
2022-11-23 12:35:35 +00:00
|
|
|
Poco::Logger * log;
|
|
|
|
|
2015-01-30 13:43:16 +00:00
|
|
|
const DictionaryStructure dict_struct;
|
|
|
|
const DictionarySourcePtr source_ptr;
|
2023-01-18 14:29:21 +00:00
|
|
|
const HashedDictionaryConfiguration configuration;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-08-07 09:09:18 +00:00
|
|
|
std::vector<Attribute> attributes;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t bytes_allocated = 0;
|
2022-05-13 10:48:47 +00:00
|
|
|
size_t hierarchical_index_bytes_allocated = 0;
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
std::atomic<size_t> element_count = 0;
|
2017-07-21 06:35:58 +00:00
|
|
|
size_t bucket_count = 0;
|
|
|
|
mutable std::atomic<size_t> query_count{0};
|
2021-05-05 07:56:21 +00:00
|
|
|
mutable std::atomic<size_t> found_count{0};
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-04-30 22:23:22 +00:00
|
|
|
BlockPtr update_field_loaded_block;
|
Add ability to load hashed dictionaries using multiple threads
Right now dictionaries (here I will talk about only
HASHED/SPARSE_HASHED/COMPLEX_KEY_HASHED/COMPLEX_KEY_SPARSE_HASHED)
can load data only in one thread, since it uses one hash table that
cannot be filled from multiple threads.
And in case you have very big dictionary (i.e. 10e9 elements), it can
take a awhile to load them, especially for SPARSE_HASHED variants (and
if you have such amount of elements there, you are likely use
SPARSE_HASHED, since it requires less memory), in my env it takes ~4
hours, which is enormous amount of time.
So this patch add support of shards for dictionaries, number of shards
determine how much hash tables will use this dictionary, also, and which
is more important, how much threads it can use to load the data.
And with 16 threads this works 2x faster, not perfect though, see the
follow up patches in this series.
v0: PARTITION BY
v1: SHARDS 1
v2: SHARDS(1)
v3: tried optimized mod - logical and, but it does not gain even 10%
v4: tried squashing more (max_block_size * shards), but it does not gain even 10% either
v5: move SHARDS into layout parameters (unknown simply ignored)
v6: tune params for perf tests (to avoid too long queries)
Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
2022-08-07 15:48:54 +00:00
|
|
|
std::vector<std::unique_ptr<Arena>> string_arenas;
|
|
|
|
std::vector<NoAttributesCollectionType> no_attributes_containers;
|
2022-05-13 10:48:47 +00:00
|
|
|
DictionaryHierarchicalParentToChildIndexPtr hierarchical_index;
|
2015-01-29 13:53:48 +00:00
|
|
|
};
|
|
|
|
|
2023-01-18 14:29:21 +00:00
|
|
|
extern template class HashedDictionary<DictionaryKeyType::Simple, false, /*sparse*/ false /*sharded*/>;
|
|
|
|
extern template class HashedDictionary<DictionaryKeyType::Simple, false /*sparse*/, true /*sharded*/>;
|
|
|
|
extern template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, false /*sharded*/>;
|
|
|
|
extern template class HashedDictionary<DictionaryKeyType::Simple, true /*sparse*/, true /*sharded*/>;
|
2021-03-24 16:31:00 +00:00
|
|
|
|
2023-01-18 14:29:21 +00:00
|
|
|
extern template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, false /*sharded*/>;
|
|
|
|
extern template class HashedDictionary<DictionaryKeyType::Complex, false /*sparse*/, true /*sharded*/>;
|
|
|
|
extern template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, false /*sharded*/>;
|
|
|
|
extern template class HashedDictionary<DictionaryKeyType::Complex, true /*sparse*/, true /*sharded*/>;
|
2021-03-24 16:31:00 +00:00
|
|
|
|
2015-01-29 13:53:48 +00:00
|
|
|
}
|