Speed up inverted index building using flat_hash_map

This commit is contained in:
HarryLeeIBM 2023-10-02 15:20:35 -07:00
parent fabd167af6
commit 9e1208a793
2 changed files with 5 additions and 4 deletions

View File

@ -3,13 +3,13 @@
#include <map> #include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include <Core/Types.h> #include <Core/Types.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/WriteBuffer.h> #include <IO/WriteBuffer.h>
#include <base/types.h> #include <base/types.h>
#include <absl/container/flat_hash_map.h>
namespace DB namespace DB
{ {
@ -107,7 +107,7 @@ public:
UInt64 state_index = 0; UInt64 state_index = 0;
/// Arcs which are started from state, the 'char' is the label on the arc /// Arcs which are started from state, the 'char' is the label on the arc
std::unordered_map<char, Arc> arcs; absl::flat_hash_map<char, Arc> arcs;
private: private:
struct FlagValues struct FlagValues
@ -146,7 +146,7 @@ private:
StatePtr initial_state; StatePtr initial_state;
/// map of (state_hash, StatePtr) /// map of (state_hash, StatePtr)
std::unordered_map<UInt64, StatePtr> minimized_states; absl::flat_hash_map<UInt64, StatePtr> minimized_states;
/// Next available ID of state /// Next available ID of state
UInt64 next_id = 1; UInt64 next_id = 1;

View File

@ -11,6 +11,7 @@
#include <mutex> #include <mutex>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include <absl/container/flat_hash_map.h>
/// GinIndexStore manages the generalized inverted index ("gin") for a data part, and it is made up of one or more immutable /// GinIndexStore manages the generalized inverted index ("gin") for a data part, and it is made up of one or more immutable
/// index segments. /// index segments.
@ -124,7 +125,7 @@ class GinIndexStore
{ {
public: public:
/// Container for all term's Gin Index Postings List Builder /// Container for all term's Gin Index Postings List Builder
using GinIndexPostingsBuilderContainer = std::unordered_map<std::string, GinIndexPostingsBuilderPtr>; using GinIndexPostingsBuilderContainer = absl::flat_hash_map<std::string, GinIndexPostingsBuilderPtr>;
GinIndexStore(const String & name_, DataPartStoragePtr storage_); GinIndexStore(const String & name_, DataPartStoragePtr storage_);
GinIndexStore(const String & name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_); GinIndexStore(const String & name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_);