From 9e273e4d242e05574acadc896a8e6e5de1b68715 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 19 Dec 2011 07:00:15 +0000 Subject: [PATCH] dbms: development [#CONV-2944]. --- dbms/include/DB/Interpreters/Aggregator.h | 3 +- dbms/include/DB/Interpreters/HashMap.h | 71 +++++++++++++++++------ dbms/src/Interpreters/tests/hash_map.cpp | 3 + 3 files changed, 59 insertions(+), 18 deletions(-) diff --git a/dbms/include/DB/Interpreters/Aggregator.h b/dbms/include/DB/Interpreters/Aggregator.h index 0a545baad65..c804bd832a7 100644 --- a/dbms/include/DB/Interpreters/Aggregator.h +++ b/dbms/include/DB/Interpreters/Aggregator.h @@ -39,7 +39,8 @@ struct UInt128 struct UInt128Hash { - size_t operator()(UInt128 x) const { return x.first ^ x.second; } + default_hash hash64; + size_t operator()(UInt128 x) const { return hash64(x.first ^ 0xB15652B8790A0D36ULL) ^ hash64(x.second); } }; struct UInt128ZeroTraits diff --git a/dbms/include/DB/Interpreters/HashMap.h b/dbms/include/DB/Interpreters/HashMap.h index 2c6848e426a..e2d3db02bf0 100644 --- a/dbms/include/DB/Interpreters/HashMap.h +++ b/dbms/include/DB/Interpreters/HashMap.h @@ -27,12 +27,31 @@ namespace DB * - проитерироваться по имеющимся в ней значениям. * * Open addressing. - * Quadratic probing (пока ещё не уверен, что оно не может зациклиться). + * Linear probing (подходит, если хэш функция хорошая!). * Значение с нулевым ключём хранится отдельно. * Удаления элементов нет. */ +/** Хэш функции, которые лучше чем тривиальная функция std::tr1::hash. + */ +template struct default_hash; + +template <> struct default_hash +{ + size_t operator() (UInt64 key) const + { + key = (~key) + (key << 18); + key = key ^ ((key >> 31) | (key << 33)); + key = key * 21; + key = key ^ ((key >> 11) | (key << 53)); + key = key + (key << 6); + key = key ^ ((key >> 22) | (key << 42)); + return key; + } +}; + + /** Способ проверить, что ключ нулевой, * а также способ установить значение ключа в ноль. */ @@ -47,7 +66,7 @@ template < typename Key, typename Mapped, - typename Hash = std::tr1::hash, + typename Hash = default_hash, typename ZeroTraits = default_zero_traits, int INITIAL_SIZE_DEGREE = 16, /** Изначально выделить кусок памяти для 64K элементов. * Уменьшите значение для лучшей кэш-локальности в случае маленького количества уникальных ключей. @@ -72,6 +91,9 @@ private: Hash hash; +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + mutable size_t collisions; +#endif inline size_t buf_size() const { return 1 << size_degree; } inline size_t max_fill() const { return 1 << (size_degree - 1); } @@ -105,12 +127,13 @@ private: void reinsert(const Value & x) { size_t place_value = place(hash(x.first)); - unsigned increment = 1; while (!ZeroTraits::check(buf[place_value].first)) { - place_value += increment; - ++increment; + ++place_value; place_value &= mask(); +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + ++collisions; +#endif } memcpy(&buf[place_value], &x, sizeof(x)); } @@ -129,6 +152,9 @@ public: { ZeroTraits::set(zero_value()->first); buf = reinterpret_cast(calloc(buf_size(), sizeof(Value))); +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + collisions = 0; +#endif } ~HashMap() @@ -250,12 +276,13 @@ public: } size_t place_value = place(hash(x.first)); - unsigned increment = 1; while (!ZeroTraits::check(buf[place_value].first) && buf[place_value].first != x.first) { - place_value += increment; - ++increment; + ++place_value; place_value &= mask(); +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + ++collisions; +#endif } iterator res(this, &buf[place_value]); @@ -309,12 +336,13 @@ public: } size_t place_value = place(hash(x)); - unsigned increment = 1; while (!ZeroTraits::check(buf[place_value].first) && buf[place_value].first != x) { - place_value += increment; - ++increment; + ++place_value; place_value &= mask(); +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + ++collisions; +#endif } it = iterator(this, &buf[place_value]); @@ -343,12 +371,13 @@ public: return has_zero ? begin() : end(); size_t place_value = place(hash(x)); - unsigned increment = 1; while (!ZeroTraits::check(buf[place_value].first) && buf[place_value].first != x) { - place_value += increment; - ++increment; + ++place_value; place_value &= mask(); +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + ++collisions; +#endif } return !ZeroTraits::check(buf[place_value].first) ? iterator(this, &buf[place_value]) : end(); @@ -361,12 +390,13 @@ public: return has_zero ? begin() : end(); size_t place_value = place(hash(x.first)); - unsigned increment = 1; while (!ZeroTraits::check(buf[place_value].first) && buf[place_value].first != x) { - place_value += increment; - ++increment; + ++place_value; place_value &= mask(); +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + ++collisions; +#endif } return !ZeroTraits::check(buf[place_value].first) ? const_iterator(this, &buf[place_value]) : end(); @@ -382,6 +412,13 @@ public: { return 0 == m_size; } + +#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS + size_t getCollisions() const + { + return collisions; + } +#endif }; } diff --git a/dbms/src/Interpreters/tests/hash_map.cpp b/dbms/src/Interpreters/tests/hash_map.cpp index c56b3928aba..91da254f140 100644 --- a/dbms/src/Interpreters/tests/hash_map.cpp +++ b/dbms/src/Interpreters/tests/hash_map.cpp @@ -9,6 +9,8 @@ #include +#define DBMS_HASH_MAP_COUNT_COLLISIONS + #include #include #include @@ -80,6 +82,7 @@ int main(int argc, char ** argv) << "DB::HashMap. Size: " << map.size() << ", elapsed: " << watch.elapsedSeconds() << " (" << n / watch.elapsedSeconds() << " elem/sec.)" + << ", collisions: " << map.getCollisions() << std::endl; }