From 99063b152fa35e0c26242399e9afbef2905c5fc3 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 25 Nov 2022 11:14:51 +0100 Subject: [PATCH] Allow to configure queue backlog of the parallel hashed dictionary loader v2: Decrease default parallel_queue_backlog to 10000 (same speed) v3: Rename parallel_queue_backlog to per_shard_load_backlog v3: Rename per_shard_load_backlog to shard_load_queue_backlog v4: Fix documentation Signed-off-by: Azat Khuzhin --- .../external-dicts-dict-layout.md | 16 ++++++++++++--- src/Dictionaries/HashedDictionary.cpp | 20 ++++++++++--------- src/Dictionaries/HashedDictionary.h | 1 + 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 1ce028a00ed..a443b3e7332 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -164,6 +164,15 @@ Configuration example: 10 + + 10000 ``` @@ -171,7 +180,7 @@ Configuration example: or ``` sql -LAYOUT(HASHED(SHARDS 10)) +LAYOUT(HASHED(SHARDS 10 [SHARD_LOAD_QUEUE_BACKLOG 10000])) ``` ### sparse_hashed @@ -209,6 +218,7 @@ Configuration example: 0 1 + ``` @@ -216,7 +226,7 @@ Configuration example: or ``` sql -LAYOUT(COMPLEX_KEY_HASHED([PREALLOCATE 0] [SHARDS 1])) +LAYOUT(COMPLEX_KEY_HASHED([PREALLOCATE 0] [SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000])) ``` ### complex_key_sparse_hashed @@ -237,7 +247,7 @@ Configuration example: or ``` sql -LAYOUT(COMPLEX_KEY_SPARSE_HASHED([PREALLOCATE 0] [SHARDS 1])) +LAYOUT(COMPLEX_KEY_SPARSE_HASHED([PREALLOCATE 0] [SHARDS 1] [SHARD_LOAD_QUEUE_BACKLOG 10000])) ``` ### hashed_array diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index 9191e24b704..22f94ddcb10 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -55,22 +55,22 @@ class ParallelDictionaryLoader : public boost::noncopyable using Queue = ConcurrentBoundedQueue; public: - explicit ParallelDictionaryLoader(HashedDictionary & dictionary_, size_t max_fill_ = 100'000) + explicit ParallelDictionaryLoader(HashedDictionary & dictionary_) : dictionary(dictionary_) , shards(dictionary.configuration.shards) - , max_fill(max_fill_) , simple_key(dictionary.dict_struct.getKeysSize() == 1) , pool(shards) , shards_queues(shards) { - LOG_TRACE(dictionary.log, "Will load the dictionary using {} threads", shards); + UInt64 backlog = dictionary.configuration.shard_load_queue_backlog; + LOG_TRACE(dictionary.log, "Will load the dictionary using {} threads (with {} backlog)", shards, backlog); shards_slots.resize(shards); std::generate(shards_slots.begin(), shards_slots.end(), [n = 0]() mutable { return n++; }); for (size_t shard = 0; shard < shards; ++shard) { - shards_queues[shard].emplace(max_fill); + shards_queues[shard].emplace(backlog); pool.scheduleOrThrowOnError([this, shard, thread_group = CurrentThread::getGroup()] { if (thread_group) @@ -124,7 +124,6 @@ public: private: HashedDictionary & dictionary; const size_t shards; - const size_t max_fill; bool simple_key; ThreadPool pool; std::vector> shards_queues; @@ -1116,14 +1115,17 @@ void registerDictionaryHashed(DictionaryFactory & factory) const bool preallocate = config.getBool(config_prefix + dictionary_layout_prefix + ".preallocate", false); Int64 shards = config.getInt(config_prefix + dictionary_layout_prefix + ".shards", 1); - if (!shards) - shards = 1; - if (shards < 0 || shards > 128) - throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [0, 128]", full_name); + if (shards <= 0 || shards > 128) + throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARDS parameter should be within [1, 128]", full_name); + + Int64 shard_load_queue_backlog = config.getInt(config_prefix + dictionary_layout_prefix + ".shard_load_queue_backlog", 10000); + if (shard_load_queue_backlog <= 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS,"{}: SHARD_LOAD_QUEUE_BACKLOG parameter should be greater then zero", full_name); HashedDictionaryStorageConfiguration configuration{ preallocate, static_cast(shards), + static_cast(shard_load_queue_backlog), require_nonempty, dict_lifetime, }; diff --git a/src/Dictionaries/HashedDictionary.h b/src/Dictionaries/HashedDictionary.h index 0d68095f9a9..11f2ecdb1da 100644 --- a/src/Dictionaries/HashedDictionary.h +++ b/src/Dictionaries/HashedDictionary.h @@ -28,6 +28,7 @@ struct HashedDictionaryStorageConfiguration { const bool preallocate; const UInt64 shards; + const UInt64 shard_load_queue_backlog; const bool require_nonempty; const DictionaryLifetime lifetime; };