dbms: unified hash tables: development [#METR-2944].

This commit is contained in:
Alexey Milovidov 2014-04-28 06:47:56 +04:00
parent 8232983714
commit 4bf42ea6e6
7 changed files with 31 additions and 29 deletions

View File

@ -26,15 +26,17 @@ struct ClearableHashMapState
template <typename Key, typename Mapped, typename Hash> template <typename Key, typename Mapped, typename Hash>
struct ClearableHashMapCell : public HashMapCell struct ClearableHashMapCell : public HashMapCell<Key, Mapped, Hash, ClearableHashMapState>
{ {
typedef ClearableHashMapState State; typedef ClearableHashMapState State;
typedef HashMapCell<Key, Mapped, Hash, ClearableHashMapState> Base;
typedef typename Base::value_type value_type;
UInt32 version; UInt32 version;
ClearableHashMapCell() {} ClearableHashMapCell() {}
ClearableHashMapCell(const Key & key_, const State & state) : value(key_, Mapped()), version(state.version) {} ClearableHashMapCell(const Key & key_, const State & state) : Base(key_, state), version(state.version) {}
ClearableHashMapCell(const value_type & value_, const State & state) : value(value_), version(state.version) {} ClearableHashMapCell(const value_type & value_, const State & state) : Base(value_, state), version(state.version) {}
bool isZero(const State & state) const { return version != state.version; } bool isZero(const State & state) const { return version != state.version; }
static bool isZero(const Key & key, const State & state) { return false; } static bool isZero(const Key & key, const State & state) { return false; }

View File

@ -5,11 +5,11 @@
#include <DB/Common/HashTable/HashTableAllocator.h> #include <DB/Common/HashTable/HashTableAllocator.h>
template <typename Key, typename Mapped, typename Hash> template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
struct HashMapCell struct HashMapCell
{ {
typedef Mapped TMapped; typedef TMapped Mapped;
typedef HashTableNoState State; typedef TState State;
typedef std::pair<Key, Mapped> value_type; typedef std::pair<Key, Mapped> value_type;
value_type value; value_type value;
@ -86,7 +86,7 @@ class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
{ {
public: public:
typedef Key key_type; typedef Key key_type;
typedef typename Cell::TMapped mapped_type; typedef typename Cell::Mapped mapped_type;
typedef typename Cell::value_type value_type; typedef typename Cell::value_type value_type;
mapped_type & operator[](Key x) mapped_type & operator[](Key x)

View File

@ -58,10 +58,10 @@ struct HashTableNoState
* Также может содержать значение и произвольные дополнительные данные * Также может содержать значение и произвольные дополнительные данные
* (пример: запомненное значение хэш-функции; номер версии для ClearableHashMap). * (пример: запомненное значение хэш-функции; номер версии для ClearableHashMap).
*/ */
template <typename Key, typename Hash> template <typename Key, typename Hash, typename TState = HashTableNoState>
struct HashTableCell struct HashTableCell
{ {
typedef HashTableNoState State; typedef TState State;
typedef Key value_type; typedef Key value_type;
Key key; Key key;

View File

@ -1052,19 +1052,11 @@ public:
} }
private: private:
struct table_growth_traits
{
/// Изначально выделить кусок памяти для 512 элементов. /// Изначально выделить кусок памяти для 512 элементов.
static const int INITIAL_SIZE_DEGREE = 9; struct Grower : public HashTableGrower
{
/** Степень роста хэш таблицы, пока не превышен порог размера. (В 4 раза.) static const size_t initial_size_degree = 9;
*/ Grower() { size_degree = initial_size_degree; }
static const int FAST_GROWTH_DEGREE = 2;
/** Порог размера, после которого степень роста уменьшается (до роста в 2 раза) - 8 миллионов элементов.
* После этого порога, максимально возможный оверхед по памяти будет всего лишь в 4, а не в 8 раз.
*/
static const int GROWTH_CHANGE_THRESHOLD = 23;
}; };
template <typename T> template <typename T>
@ -1076,7 +1068,9 @@ private:
const ColumnArray::Offsets_t & offsets = array->getOffsets(); const ColumnArray::Offsets_t & offsets = array->getOffsets();
const typename ColumnVector<T>::Container_t & values = nested->getData(); const typename ColumnVector<T>::Container_t & values = nested->getData();
typedef ClearableHashMap<T, UInt32, default_hash<T>, table_growth_traits> ValuesToIndices; typedef ClearableHashMap<T, UInt32, DefaultHash<T>, Grower,
HashTableAllocatorWithStackMemory<(1 << Grower::initial_size_degree) * sizeof(T)> > ValuesToIndices;
ValuesToIndices indices; ValuesToIndices indices;
size_t prev_off = 0; size_t prev_off = 0;
for (size_t i = 0; i < offsets.size(); ++i) for (size_t i = 0; i < offsets.size(); ++i)
@ -1100,7 +1094,9 @@ private:
const ColumnArray::Offsets_t & offsets = array->getOffsets(); const ColumnArray::Offsets_t & offsets = array->getOffsets();
size_t prev_off = 0; size_t prev_off = 0;
typedef ClearableHashMap<StringRef, UInt32, std::hash<StringRef>, table_growth_traits> ValuesToIndices; typedef ClearableHashMap<StringRef, UInt32, DefaultHash<StringRef>, Grower,
HashTableAllocatorWithStackMemory<(1 << Grower::initial_size_degree) * sizeof(StringRef)> > ValuesToIndices;
ValuesToIndices indices; ValuesToIndices indices;
for (size_t i = 0; i < offsets.size(); ++i) for (size_t i = 0; i < offsets.size(); ++i)
{ {
@ -1153,7 +1149,9 @@ private:
if (keys_bytes > 16) if (keys_bytes > 16)
return false; return false;
typedef ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, table_growth_traits> ValuesToIndices; typedef ClearableHashMap<UInt128, UInt32, UInt128Hash, Grower,
HashTableAllocatorWithStackMemory<(1 << Grower::initial_size_degree) * sizeof(UInt128)> > ValuesToIndices;
ValuesToIndices indices; ValuesToIndices indices;
size_t prev_off = 0; size_t prev_off = 0;
for (size_t i = 0; i < offsets.size(); ++i) for (size_t i = 0; i < offsets.size(); ++i)
@ -1177,7 +1175,9 @@ private:
{ {
size_t count = columns.size(); size_t count = columns.size();
typedef ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, table_growth_traits> ValuesToIndices; typedef ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, Grower,
HashTableAllocatorWithStackMemory<(1 << Grower::initial_size_degree) * sizeof(UInt128)> > ValuesToIndices;
ValuesToIndices indices; ValuesToIndices indices;
StringRefs keys(count); StringRefs keys(count);
size_t prev_off = 0; size_t prev_off = 0;

View File

@ -28,7 +28,7 @@ int main(int argc, char ** argv)
typedef std::unordered_map<std::string, int> Set; typedef std::unordered_map<std::string, int> Set;
typedef std::unordered_map<StringRef, int> RefsSet; typedef std::unordered_map<StringRef, int> RefsSet;
typedef google::dense_hash_map<std::string, int> DenseSet; typedef google::dense_hash_map<std::string, int> DenseSet;
typedef google::dense_hash_map<StringRef, int> RefsDenseSet; typedef google::dense_hash_map<StringRef, int, std::hash<StringRef> > RefsDenseSet;
typedef HashMap<StringRef, int> RefsHashMap; typedef HashMap<StringRef, int> RefsHashMap;
Vec vec; Vec vec;

View File

@ -530,7 +530,7 @@ struct GrowthTraits : public DB::default_growth_traits
int main(int argc, char ** argv) int main(int argc, char ** argv)
{ {
typedef HashMap< typedef DB::HashMap<
StringRef, StringRef,
UInt64, UInt64,
TrivialHash, TrivialHash,

View File

@ -94,7 +94,7 @@ static std::string listOfColumns(const NamesAndTypesList & available_columns)
} }
typedef google::dense_hash_map<StringRef, const IDataType *> NamesAndTypesMap; typedef google::dense_hash_map<StringRef, const IDataType *, std::hash<StringRef> > NamesAndTypesMap;
static NamesAndTypesMap getColumnsMap(const NamesAndTypesList & available_columns) static NamesAndTypesMap getColumnsMap(const NamesAndTypesList & available_columns)
@ -120,7 +120,7 @@ void ITableDeclaration::check(const Names & column_names) const
const NamesAndTypesMap & columns_map = getColumnsMap(available_columns); const NamesAndTypesMap & columns_map = getColumnsMap(available_columns);
typedef google::dense_hash_set<StringRef> UniqueStrings; typedef google::dense_hash_set<StringRef, std::hash<StringRef> > UniqueStrings;
UniqueStrings unique_names; UniqueStrings unique_names;
unique_names.set_empty_key(StringRef()); unique_names.set_empty_key(StringRef());