dbms: unified hash tables: development [#METR-2944].

This commit is contained in:
Alexey Milovidov 2014-04-28 06:47:56 +04:00
parent 8232983714
commit 4bf42ea6e6
7 changed files with 31 additions and 29 deletions

View File

@ -26,15 +26,17 @@ struct ClearableHashMapState
template <typename Key, typename Mapped, typename Hash>
struct ClearableHashMapCell : public HashMapCell
struct ClearableHashMapCell : public HashMapCell<Key, Mapped, Hash, ClearableHashMapState>
{
typedef ClearableHashMapState State;
typedef HashMapCell<Key, Mapped, Hash, ClearableHashMapState> Base;
typedef typename Base::value_type value_type;
UInt32 version;
ClearableHashMapCell() {}
ClearableHashMapCell(const Key & key_, const State & state) : value(key_, Mapped()), version(state.version) {}
ClearableHashMapCell(const value_type & value_, const State & state) : value(value_), version(state.version) {}
ClearableHashMapCell(const Key & key_, const State & state) : Base(key_, state), version(state.version) {}
ClearableHashMapCell(const value_type & value_, const State & state) : Base(value_, state), version(state.version) {}
bool isZero(const State & state) const { return version != state.version; }
static bool isZero(const Key & key, const State & state) { return false; }

View File

@ -5,11 +5,11 @@
#include <DB/Common/HashTable/HashTableAllocator.h>
template <typename Key, typename Mapped, typename Hash>
template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
struct HashMapCell
{
typedef Mapped TMapped;
typedef HashTableNoState State;
typedef TMapped Mapped;
typedef TState State;
typedef std::pair<Key, Mapped> value_type;
value_type value;
@ -86,7 +86,7 @@ class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator>
{
public:
typedef Key key_type;
typedef typename Cell::TMapped mapped_type;
typedef typename Cell::Mapped mapped_type;
typedef typename Cell::value_type value_type;
mapped_type & operator[](Key x)

View File

@ -58,10 +58,10 @@ struct HashTableNoState
* Также может содержать значение и произвольные дополнительные данные
* (пример: запомненное значение хэш-функции; номер версии для ClearableHashMap).
*/
template <typename Key, typename Hash>
template <typename Key, typename Hash, typename TState = HashTableNoState>
struct HashTableCell
{
typedef HashTableNoState State;
typedef TState State;
typedef Key value_type;
Key key;

View File

@ -1052,19 +1052,11 @@ public:
}
private:
struct table_growth_traits
{
/// Изначально выделить кусок памяти для 512 элементов.
static const int INITIAL_SIZE_DEGREE = 9;
/** Степень роста хэш таблицы, пока не превышен порог размера. (В 4 раза.)
*/
static const int FAST_GROWTH_DEGREE = 2;
/** Порог размера, после которого степень роста уменьшается (до роста в 2 раза) - 8 миллионов элементов.
* После этого порога, максимально возможный оверхед по памяти будет всего лишь в 4, а не в 8 раз.
*/
static const int GROWTH_CHANGE_THRESHOLD = 23;
struct Grower : public HashTableGrower
{
static const size_t initial_size_degree = 9;
Grower() { size_degree = initial_size_degree; }
};
template <typename T>
@ -1076,7 +1068,9 @@ private:
const ColumnArray::Offsets_t & offsets = array->getOffsets();
const typename ColumnVector<T>::Container_t & values = nested->getData();
typedef ClearableHashMap<T, UInt32, default_hash<T>, table_growth_traits> ValuesToIndices;
typedef ClearableHashMap<T, UInt32, DefaultHash<T>, Grower,
HashTableAllocatorWithStackMemory<(1 << Grower::initial_size_degree) * sizeof(T)> > ValuesToIndices;
ValuesToIndices indices;
size_t prev_off = 0;
for (size_t i = 0; i < offsets.size(); ++i)
@ -1100,7 +1094,9 @@ private:
const ColumnArray::Offsets_t & offsets = array->getOffsets();
size_t prev_off = 0;
typedef ClearableHashMap<StringRef, UInt32, std::hash<StringRef>, table_growth_traits> ValuesToIndices;
typedef ClearableHashMap<StringRef, UInt32, DefaultHash<StringRef>, Grower,
HashTableAllocatorWithStackMemory<(1 << Grower::initial_size_degree) * sizeof(StringRef)> > ValuesToIndices;
ValuesToIndices indices;
for (size_t i = 0; i < offsets.size(); ++i)
{
@ -1153,7 +1149,9 @@ private:
if (keys_bytes > 16)
return false;
typedef ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, table_growth_traits> ValuesToIndices;
typedef ClearableHashMap<UInt128, UInt32, UInt128Hash, Grower,
HashTableAllocatorWithStackMemory<(1 << Grower::initial_size_degree) * sizeof(UInt128)> > ValuesToIndices;
ValuesToIndices indices;
size_t prev_off = 0;
for (size_t i = 0; i < offsets.size(); ++i)
@ -1177,7 +1175,9 @@ private:
{
size_t count = columns.size();
typedef ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, table_growth_traits> ValuesToIndices;
typedef ClearableHashMap<UInt128, UInt32, UInt128TrivialHash, Grower,
HashTableAllocatorWithStackMemory<(1 << Grower::initial_size_degree) * sizeof(UInt128)> > ValuesToIndices;
ValuesToIndices indices;
StringRefs keys(count);
size_t prev_off = 0;

View File

@ -28,7 +28,7 @@ int main(int argc, char ** argv)
typedef std::unordered_map<std::string, int> Set;
typedef std::unordered_map<StringRef, int> RefsSet;
typedef google::dense_hash_map<std::string, int> DenseSet;
typedef google::dense_hash_map<StringRef, int> RefsDenseSet;
typedef google::dense_hash_map<StringRef, int, std::hash<StringRef> > RefsDenseSet;
typedef HashMap<StringRef, int> RefsHashMap;
Vec vec;

View File

@ -530,7 +530,7 @@ struct GrowthTraits : public DB::default_growth_traits
int main(int argc, char ** argv)
{
typedef HashMap<
typedef DB::HashMap<
StringRef,
UInt64,
TrivialHash,

View File

@ -94,7 +94,7 @@ static std::string listOfColumns(const NamesAndTypesList & available_columns)
}
typedef google::dense_hash_map<StringRef, const IDataType *> NamesAndTypesMap;
typedef google::dense_hash_map<StringRef, const IDataType *, std::hash<StringRef> > NamesAndTypesMap;
static NamesAndTypesMap getColumnsMap(const NamesAndTypesList & available_columns)
@ -120,7 +120,7 @@ void ITableDeclaration::check(const Names & column_names) const
const NamesAndTypesMap & columns_map = getColumnsMap(available_columns);
typedef google::dense_hash_set<StringRef> UniqueStrings;
typedef google::dense_hash_set<StringRef, std::hash<StringRef> > UniqueStrings;
UniqueStrings unique_names;
unique_names.set_empty_key(StringRef());