Speed up inverted index building using flat_hash_map

This commit is contained in:
HarryLeeIBM 2023-10-02 15:20:35 -07:00
parent fabd167af6
commit 9e1208a793
2 changed files with 5 additions and 4 deletions

View File

@ -3,13 +3,13 @@
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <Core/Types.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBuffer.h>
#include <base/types.h>
#include <absl/container/flat_hash_map.h>
namespace DB
{
@ -107,7 +107,7 @@ public:
UInt64 state_index = 0;
/// Arcs which are started from state, the 'char' is the label on the arc
std::unordered_map<char, Arc> arcs;
absl::flat_hash_map<char, Arc> arcs;
private:
struct FlagValues
@ -146,7 +146,7 @@ private:
StatePtr initial_state;
/// map of (state_hash, StatePtr)
std::unordered_map<UInt64, StatePtr> minimized_states;
absl::flat_hash_map<UInt64, StatePtr> minimized_states;
/// Next available ID of state
UInt64 next_id = 1;

View File

@ -11,6 +11,7 @@
#include <mutex>
#include <unordered_map>
#include <vector>
#include <absl/container/flat_hash_map.h>
/// GinIndexStore manages the generalized inverted index ("gin") for a data part, and it is made up of one or more immutable
/// index segments.
@ -124,7 +125,7 @@ class GinIndexStore
{
public:
/// Container for all term's Gin Index Postings List Builder
using GinIndexPostingsBuilderContainer = std::unordered_map<std::string, GinIndexPostingsBuilderPtr>;
using GinIndexPostingsBuilderContainer = absl::flat_hash_map<std::string, GinIndexPostingsBuilderPtr>;
GinIndexStore(const String & name_, DataPartStoragePtr storage_);
GinIndexStore(const String & name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_);