mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-29 19:12:03 +00:00
dbms: TwoLevelHashTable: development [#METR-2944].
This commit is contained in:
parent
f0792f0deb
commit
3411b9fe68
@ -189,7 +189,7 @@ struct ZeroValueStorage<true, Cell>
|
|||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
bool has_zero = false;
|
bool has_zero = false;
|
||||||
char zero_value_storage[sizeof(Cell)]; /// Кусок памяти для элемента с ключём 0.
|
char zero_value_storage[sizeof(Cell)] __attribute__((__aligned__(__alignof__(Cell)))); /// Кусок памяти для элемента с ключём 0.
|
||||||
|
|
||||||
public:
|
public:
|
||||||
bool hasZero() const { return has_zero; }
|
bool hasZero() const { return has_zero; }
|
||||||
|
@ -40,34 +40,28 @@ protected:
|
|||||||
size_t m_size = 0; /// Количество элементов
|
size_t m_size = 0; /// Количество элементов
|
||||||
|
|
||||||
size_t hash(const Key & x) const { return Hash::operator()(x); }
|
size_t hash(const Key & x) const { return Hash::operator()(x); }
|
||||||
size_t bucket(size_t hash_value) const { return hash_value >> 24; } /// TODO: брать не настолько младший байт.
|
size_t bucket(size_t hash_value) const { return hash_value >> 56; }
|
||||||
|
|
||||||
typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket)
|
typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket)
|
||||||
{
|
{
|
||||||
do
|
while (bucket != NUM_BUCKETS && impls[bucket].empty())
|
||||||
{
|
|
||||||
++bucket;
|
++bucket;
|
||||||
}
|
|
||||||
while (bucket != NUM_BUCKETS && !impls[bucket].empty());
|
|
||||||
|
|
||||||
if (bucket != NUM_BUCKETS)
|
if (bucket != NUM_BUCKETS)
|
||||||
return impls[bucket].begin();
|
return impls[bucket].begin();
|
||||||
|
|
||||||
return impls[NUM_BUCKETS - 1].end();
|
return impls[MAX_BUCKET].end();
|
||||||
}
|
}
|
||||||
|
|
||||||
typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const
|
typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const
|
||||||
{
|
{
|
||||||
do
|
while (bucket != NUM_BUCKETS && impls[bucket].empty())
|
||||||
{
|
|
||||||
++bucket;
|
++bucket;
|
||||||
}
|
|
||||||
while (bucket != NUM_BUCKETS && !impls[bucket].empty());
|
|
||||||
|
|
||||||
if (bucket != NUM_BUCKETS)
|
if (bucket != NUM_BUCKETS)
|
||||||
return impls[bucket].begin();
|
return impls[bucket].begin();
|
||||||
|
|
||||||
return impls[NUM_BUCKETS - 1].end();
|
return impls[MAX_BUCKET].end();
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@ -75,19 +69,20 @@ public:
|
|||||||
typedef typename Impl::value_type value_type;
|
typedef typename Impl::value_type value_type;
|
||||||
|
|
||||||
static constexpr size_t NUM_BUCKETS = 256;
|
static constexpr size_t NUM_BUCKETS = 256;
|
||||||
|
static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
|
||||||
Impl impls[NUM_BUCKETS];
|
Impl impls[NUM_BUCKETS];
|
||||||
|
|
||||||
|
|
||||||
class iterator
|
class iterator
|
||||||
{
|
{
|
||||||
Impl * impls;
|
Self * container;
|
||||||
size_t bucket;
|
size_t bucket;
|
||||||
typename Impl::iterator current_it;
|
typename Impl::iterator current_it;
|
||||||
|
|
||||||
friend class TwoLevelHashTable;
|
friend class TwoLevelHashTable;
|
||||||
|
|
||||||
iterator(Impl * impls_, size_t bucket_, typename Impl::iterator & current_it_)
|
iterator(Self * container_, size_t bucket_, typename Impl::iterator current_it_)
|
||||||
: impls(impls_), bucket(bucket_), current_it(current_it_) {}
|
: container(container_), bucket(bucket_), current_it(current_it_) {}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
iterator() {}
|
iterator() {}
|
||||||
@ -98,8 +93,11 @@ public:
|
|||||||
iterator & operator++()
|
iterator & operator++()
|
||||||
{
|
{
|
||||||
++current_it;
|
++current_it;
|
||||||
if (current_it == impls[bucket].end())
|
if (current_it == container->impls[bucket].end())
|
||||||
current_it = beginOfNextNonEmptyBucket(bucket);
|
{
|
||||||
|
++bucket;
|
||||||
|
current_it = container->beginOfNextNonEmptyBucket(bucket);
|
||||||
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -111,18 +109,18 @@ public:
|
|||||||
|
|
||||||
class const_iterator
|
class const_iterator
|
||||||
{
|
{
|
||||||
Impl * impls;
|
Self * container;
|
||||||
size_t bucket;
|
size_t bucket;
|
||||||
typename Impl::const_iterator current_it;
|
typename Impl::const_iterator current_it;
|
||||||
|
|
||||||
friend class TwoLevelHashTable;
|
friend class TwoLevelHashTable;
|
||||||
|
|
||||||
const_iterator(Impl * impls_, size_t bucket_, typename Impl::const_iterator & current_it_)
|
const_iterator(Self * container_, size_t bucket_, typename Impl::const_iterator current_it_)
|
||||||
: impls(impls_), bucket(bucket_), current_it(current_it_) {}
|
: container(container_), bucket(bucket_), current_it(current_it_) {}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
const_iterator() {}
|
const_iterator() {}
|
||||||
const_iterator(const iterator & rhs) : impls(rhs.impls), current_it(rhs.current_it), bucket(rhs.bucket) {}
|
const_iterator(const iterator & rhs) : container(rhs.container), bucket(rhs.bucket), current_it(rhs.current_it) {}
|
||||||
|
|
||||||
bool operator== (const const_iterator & rhs) const { return current_it == rhs.current_it; }
|
bool operator== (const const_iterator & rhs) const { return current_it == rhs.current_it; }
|
||||||
bool operator!= (const const_iterator & rhs) const { return current_it != rhs.current_it; }
|
bool operator!= (const const_iterator & rhs) const { return current_it != rhs.current_it; }
|
||||||
@ -130,8 +128,11 @@ public:
|
|||||||
const_iterator & operator++()
|
const_iterator & operator++()
|
||||||
{
|
{
|
||||||
++current_it;
|
++current_it;
|
||||||
if (current_it == impls[bucket].end())
|
if (current_it == container->impls[bucket].end())
|
||||||
current_it = beginOfNextNonEmptyBucket(bucket);
|
{
|
||||||
|
++bucket;
|
||||||
|
current_it = container->beginOfNextNonEmptyBucket(bucket);
|
||||||
|
}
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
@ -144,17 +145,19 @@ public:
|
|||||||
const_iterator begin() const
|
const_iterator begin() const
|
||||||
{
|
{
|
||||||
size_t buck = 0;
|
size_t buck = 0;
|
||||||
return beginOfNextNonEmptyBucket(buck);
|
typename Impl::const_iterator impl_it = beginOfNextNonEmptyBucket(buck);
|
||||||
|
return { this, buck, impl_it };
|
||||||
}
|
}
|
||||||
|
|
||||||
iterator begin()
|
iterator begin()
|
||||||
{
|
{
|
||||||
size_t buck = 0;
|
size_t buck = 0;
|
||||||
return beginOfNextNonEmptyBucket(buck);
|
typename Impl::iterator impl_it = beginOfNextNonEmptyBucket(buck);
|
||||||
|
return { this, buck, impl_it };
|
||||||
}
|
}
|
||||||
|
|
||||||
const_iterator end() const { return impls[NUM_BUCKETS - 1].end(); }
|
const_iterator end() const { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
|
||||||
iterator end() { return impls[NUM_BUCKETS - 1].end(); }
|
iterator end() { return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
|
||||||
|
|
||||||
|
|
||||||
/// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
|
/// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
|
||||||
@ -196,7 +199,7 @@ public:
|
|||||||
size_t buck = bucket(hash_value);
|
size_t buck = bucket(hash_value);
|
||||||
typename Impl::iterator impl_it;
|
typename Impl::iterator impl_it;
|
||||||
impls[buck].emplace(x, impl_it, inserted);
|
impls[buck].emplace(x, impl_it, inserted);
|
||||||
it = iterator(impls, buck, impl_it);
|
it = iterator(this, buck, impl_it);
|
||||||
|
|
||||||
if (inserted)
|
if (inserted)
|
||||||
++m_size;
|
++m_size;
|
||||||
@ -210,7 +213,7 @@ public:
|
|||||||
|
|
||||||
typename Impl::iterator found = impls[buck].find(x);
|
typename Impl::iterator found = impls[buck].find(x);
|
||||||
return found != impls[buck].end()
|
return found != impls[buck].end()
|
||||||
? iterator(impls, buck, found)
|
? iterator(this, buck, found)
|
||||||
: end();
|
: end();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -222,7 +225,7 @@ public:
|
|||||||
|
|
||||||
typename Impl::const_iterator found = impls[buck].find(x);
|
typename Impl::const_iterator found = impls[buck].find(x);
|
||||||
return found != impls[buck].end()
|
return found != impls[buck].end()
|
||||||
? const_iterator(impls, buck, found)
|
? const_iterator(this, buck, found)
|
||||||
: end();
|
: end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,35 +81,160 @@ struct DefaultHash<CompactStringRef>
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
#define mix(h) ({ \
|
||||||
|
(h) ^= (h) >> 23; \
|
||||||
|
(h) *= 0x2127599bf4325c37ULL; \
|
||||||
|
(h) ^= (h) >> 47; })
|
||||||
|
|
||||||
|
struct FastHash64
|
||||||
|
{
|
||||||
|
size_t operator() (CompactStringRef x) const
|
||||||
|
{
|
||||||
|
const char * buf = x.data();
|
||||||
|
size_t len = x.size;
|
||||||
|
|
||||||
|
const uint64_t m = 0x880355f21e6d1965ULL;
|
||||||
|
const uint64_t *pos = (const uint64_t *)buf;
|
||||||
|
const uint64_t *end = pos + (len / 8);
|
||||||
|
const unsigned char *pos2;
|
||||||
|
uint64_t h = len * m;
|
||||||
|
uint64_t v;
|
||||||
|
|
||||||
|
while (pos != end) {
|
||||||
|
v = *pos++;
|
||||||
|
h ^= mix(v);
|
||||||
|
h *= m;
|
||||||
|
}
|
||||||
|
|
||||||
|
pos2 = (const unsigned char*)pos;
|
||||||
|
v = 0;
|
||||||
|
|
||||||
|
switch (len & 7) {
|
||||||
|
case 7: v ^= (uint64_t)pos2[6] << 48;
|
||||||
|
case 6: v ^= (uint64_t)pos2[5] << 40;
|
||||||
|
case 5: v ^= (uint64_t)pos2[4] << 32;
|
||||||
|
case 4: v ^= (uint64_t)pos2[3] << 24;
|
||||||
|
case 3: v ^= (uint64_t)pos2[2] << 16;
|
||||||
|
case 2: v ^= (uint64_t)pos2[1] << 8;
|
||||||
|
case 1: v ^= (uint64_t)pos2[0];
|
||||||
|
h ^= mix(v);
|
||||||
|
h *= m;
|
||||||
|
}
|
||||||
|
|
||||||
|
return mix(h);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct CrapWow
|
||||||
|
{
|
||||||
|
size_t operator() (CompactStringRef x) const
|
||||||
|
{
|
||||||
|
const char * key = x.data();
|
||||||
|
size_t len = x.size;
|
||||||
|
size_t seed = 0;
|
||||||
|
|
||||||
|
const UInt64 m = 0x95b47aa3355ba1a1, n = 0x8a970be7488fda55;
|
||||||
|
UInt64 hash;
|
||||||
|
// 3 = m, 4 = n
|
||||||
|
// r12 = h, r13 = k, ecx = seed, r12 = key
|
||||||
|
asm(
|
||||||
|
"leaq (%%rcx,%4), %%r13\n"
|
||||||
|
"movq %%rdx, %%r14\n"
|
||||||
|
"movq %%rcx, %%r15\n"
|
||||||
|
"movq %%rcx, %%r12\n"
|
||||||
|
"addq %%rax, %%r13\n"
|
||||||
|
"andq $0xfffffffffffffff0, %%rcx\n"
|
||||||
|
"jz QW%=\n"
|
||||||
|
"addq %%rcx, %%r14\n\n"
|
||||||
|
"negq %%rcx\n"
|
||||||
|
"XW%=:\n"
|
||||||
|
"movq %4, %%rax\n"
|
||||||
|
"mulq (%%r14,%%rcx)\n"
|
||||||
|
"xorq %%rax, %%r12\n"
|
||||||
|
"xorq %%rdx, %%r13\n"
|
||||||
|
"movq %3, %%rax\n"
|
||||||
|
"mulq 8(%%r14,%%rcx)\n"
|
||||||
|
"xorq %%rdx, %%r12\n"
|
||||||
|
"xorq %%rax, %%r13\n"
|
||||||
|
"addq $16, %%rcx\n"
|
||||||
|
"jnz XW%=\n"
|
||||||
|
"QW%=:\n"
|
||||||
|
"movq %%r15, %%rcx\n"
|
||||||
|
"andq $8, %%r15\n"
|
||||||
|
"jz B%=\n"
|
||||||
|
"movq %4, %%rax\n"
|
||||||
|
"mulq (%%r14)\n"
|
||||||
|
"addq $8, %%r14\n"
|
||||||
|
"xorq %%rax, %%r12\n"
|
||||||
|
"xorq %%rdx, %%r13\n"
|
||||||
|
"B%=:\n"
|
||||||
|
"andq $7, %%rcx\n"
|
||||||
|
"jz F%=\n"
|
||||||
|
"movq $1, %%rdx\n"
|
||||||
|
"shlq $3, %%rcx\n"
|
||||||
|
"movq %3, %%rax\n"
|
||||||
|
"shlq %%cl, %%rdx\n"
|
||||||
|
"addq $-1, %%rdx\n"
|
||||||
|
"andq (%%r14), %%rdx\n"
|
||||||
|
"mulq %%rdx\n"
|
||||||
|
"xorq %%rdx, %%r12\n"
|
||||||
|
"xorq %%rax, %%r13\n"
|
||||||
|
"F%=:\n"
|
||||||
|
"leaq (%%r13,%4), %%rax\n"
|
||||||
|
"xorq %%r12, %%rax\n"
|
||||||
|
"mulq %4\n"
|
||||||
|
"xorq %%rdx, %%rax\n"
|
||||||
|
"xorq %%r12, %%rax\n"
|
||||||
|
"xorq %%r13, %%rax\n"
|
||||||
|
: "=a"(hash), "=c"(key), "=d"(key)
|
||||||
|
: "r"(m), "r"(n), "a"(seed), "c"(len), "d"(key)
|
||||||
|
: "%r12", "%r13", "%r14", "%r15", "cc"
|
||||||
|
);
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct SimpleHash
|
||||||
|
{
|
||||||
|
size_t operator() (CompactStringRef x) const
|
||||||
|
{
|
||||||
|
const char * pos = x.data();
|
||||||
|
size_t size = x.size;
|
||||||
|
|
||||||
|
const char * end = pos + size;
|
||||||
|
|
||||||
|
size_t res = 0;
|
||||||
|
|
||||||
|
if (size == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (size < 8)
|
||||||
|
{
|
||||||
|
memcpy(reinterpret_cast<char *>(&res), pos, size);
|
||||||
|
return intHash64(res);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (pos + 8 < end)
|
||||||
|
{
|
||||||
|
UInt64 word = *reinterpret_cast<const UInt64 *>(pos);
|
||||||
|
res = intHash64(word ^ res);
|
||||||
|
|
||||||
|
pos += 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
UInt64 word = *reinterpret_cast<const UInt64 *>(end - 8);
|
||||||
|
res = intHash64(word ^ res);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
typedef CompactStringRef Key;
|
typedef CompactStringRef Key;
|
||||||
typedef UInt64 Value;
|
typedef UInt64 Value;
|
||||||
|
|
||||||
struct CellWithSavedHash : public HashMapCell<Key, Value, DefaultHash<Key> >
|
|
||||||
{
|
|
||||||
size_t saved_hash;
|
|
||||||
|
|
||||||
CellWithSavedHash() : HashMapCell() {}
|
|
||||||
CellWithSavedHash(const Key & key_, const State & state) : HashMapCell(key_, state) {}
|
|
||||||
CellWithSavedHash(const value_type & value_, const State & state) : HashMapCell(value_, state) {}
|
|
||||||
|
|
||||||
/* static bool equals(const StringRef & lhs, const StringRef & rhs)
|
|
||||||
{
|
|
||||||
if (lhs.size != rhs.size)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
for (size_t pos = lhs.size - 1; pos < lhs.size; --pos)
|
|
||||||
if (lhs.data[pos] != rhs.data[pos])
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}*/
|
|
||||||
|
|
||||||
bool keyEquals(const Key & key_) const { return value.first == key_; }
|
|
||||||
bool keyEquals(const CellWithSavedHash & other) const { return saved_hash == other.saved_hash && value.first == other.value.first; }
|
|
||||||
|
|
||||||
void setHash(size_t hash_value) { saved_hash = hash_value; }
|
|
||||||
size_t getHash(const DefaultHash<Key> & hash) const { return saved_hash; }
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Grower : public HashTableGrower<>
|
struct Grower : public HashTableGrower<>
|
||||||
{
|
{
|
||||||
@ -188,7 +313,7 @@ int main(int argc, char ** argv)
|
|||||||
//typedef HashMap<Key, Value> Map;
|
//typedef HashMap<Key, Value> Map;
|
||||||
|
|
||||||
/// Сохранение хэша ускоряет ресайзы примерно в 2 раза, и общую производительность - на 6-8%.
|
/// Сохранение хэша ускоряет ресайзы примерно в 2 раза, и общую производительность - на 6-8%.
|
||||||
typedef HashMapTable<Key, CellWithSavedHash, DefaultHash<Key>, Grower> Map;
|
typedef HashMapWithSavedHash<Key, Value, DefaultHash<Key>, Grower> Map;
|
||||||
|
|
||||||
Map map;
|
Map map;
|
||||||
Map::iterator it;
|
Map::iterator it;
|
||||||
@ -204,7 +329,7 @@ int main(int argc, char ** argv)
|
|||||||
|
|
||||||
watch.stop();
|
watch.stop();
|
||||||
std::cerr << std::fixed << std::setprecision(2)
|
std::cerr << std::fixed << std::setprecision(2)
|
||||||
<< "HashMap. Size: " << map.size()
|
<< "HashMap (CityHash64). Size: " << map.size()
|
||||||
<< ", elapsed: " << watch.elapsedSeconds()
|
<< ", elapsed: " << watch.elapsedSeconds()
|
||||||
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
||||||
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||||
@ -217,6 +342,93 @@ int main(int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
Stopwatch watch;
|
Stopwatch watch;
|
||||||
|
|
||||||
|
typedef HashMapWithSavedHash<Key, Value, FastHash64, Grower> Map;
|
||||||
|
|
||||||
|
Map map;
|
||||||
|
Map::iterator it;
|
||||||
|
bool inserted;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
map.emplace(data[i], it, inserted);
|
||||||
|
if (inserted)
|
||||||
|
it->second = 0;
|
||||||
|
++it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
watch.stop();
|
||||||
|
std::cerr << std::fixed << std::setprecision(2)
|
||||||
|
<< "HashMap (FastHash64). Size: " << map.size()
|
||||||
|
<< ", elapsed: " << watch.elapsedSeconds()
|
||||||
|
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
||||||
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||||
|
<< ", collisions: " << map.getCollisions()
|
||||||
|
#endif
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m || m == 3)
|
||||||
|
{
|
||||||
|
Stopwatch watch;
|
||||||
|
|
||||||
|
typedef HashMapWithSavedHash<Key, Value, CrapWow, Grower> Map;
|
||||||
|
|
||||||
|
Map map;
|
||||||
|
Map::iterator it;
|
||||||
|
bool inserted;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
map.emplace(data[i], it, inserted);
|
||||||
|
if (inserted)
|
||||||
|
it->second = 0;
|
||||||
|
++it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
watch.stop();
|
||||||
|
std::cerr << std::fixed << std::setprecision(2)
|
||||||
|
<< "HashMap (CrapWow). Size: " << map.size()
|
||||||
|
<< ", elapsed: " << watch.elapsedSeconds()
|
||||||
|
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
||||||
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||||
|
<< ", collisions: " << map.getCollisions()
|
||||||
|
#endif
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m || m == 4)
|
||||||
|
{
|
||||||
|
Stopwatch watch;
|
||||||
|
|
||||||
|
typedef HashMapWithSavedHash<Key, Value, SimpleHash, Grower> Map;
|
||||||
|
|
||||||
|
Map map;
|
||||||
|
Map::iterator it;
|
||||||
|
bool inserted;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
map.emplace(data[i], it, inserted);
|
||||||
|
if (inserted)
|
||||||
|
it->second = 0;
|
||||||
|
++it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
watch.stop();
|
||||||
|
std::cerr << std::fixed << std::setprecision(2)
|
||||||
|
<< "HashMap (SimpleHash). Size: " << map.size()
|
||||||
|
<< ", elapsed: " << watch.elapsedSeconds()
|
||||||
|
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
||||||
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||||
|
<< ", collisions: " << map.getCollisions()
|
||||||
|
#endif
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m || m == 5)
|
||||||
|
{
|
||||||
|
Stopwatch watch;
|
||||||
|
|
||||||
std::unordered_map<Key, Value, DefaultHash<Key> > map;
|
std::unordered_map<Key, Value, DefaultHash<Key> > map;
|
||||||
for (size_t i = 0; i < n; ++i)
|
for (size_t i = 0; i < n; ++i)
|
||||||
++map[data[i]];
|
++map[data[i]];
|
||||||
@ -229,7 +441,7 @@ int main(int argc, char ** argv)
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m || m == 3)
|
if (!m || m == 6)
|
||||||
{
|
{
|
||||||
Stopwatch watch;
|
Stopwatch watch;
|
||||||
|
|
||||||
@ -246,7 +458,7 @@ int main(int argc, char ** argv)
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m || m == 4)
|
if (!m || m == 7)
|
||||||
{
|
{
|
||||||
Stopwatch watch;
|
Stopwatch watch;
|
||||||
|
|
||||||
|
@ -7,10 +7,11 @@
|
|||||||
#include <sparsehash/dense_hash_map>
|
#include <sparsehash/dense_hash_map>
|
||||||
#include <sparsehash/sparse_hash_map>
|
#include <sparsehash/sparse_hash_map>
|
||||||
|
|
||||||
#include <statdaemons/Stopwatch.h>
|
|
||||||
|
|
||||||
//#define DBMS_HASH_MAP_DEBUG_RESIZES
|
//#define DBMS_HASH_MAP_DEBUG_RESIZES
|
||||||
|
|
||||||
|
#include <statdaemons/Stopwatch.h>
|
||||||
|
#include <stats/UniquesHashSet.h>
|
||||||
|
|
||||||
#include <DB/Core/Types.h>
|
#include <DB/Core/Types.h>
|
||||||
#include <DB/IO/ReadBufferFromFile.h>
|
#include <DB/IO/ReadBufferFromFile.h>
|
||||||
#include <DB/IO/CompressedReadBuffer.h>
|
#include <DB/IO/CompressedReadBuffer.h>
|
||||||
@ -48,6 +49,8 @@ int main(int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
Stopwatch watch;
|
Stopwatch watch;
|
||||||
|
|
||||||
|
std::cerr << sizeof(HashMapCell<Key, Value, DefaultHash<Key> >) << std::endl;
|
||||||
|
|
||||||
typedef TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key> >, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator> Map;
|
typedef TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key> >, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator> Map;
|
||||||
|
|
||||||
Map map;
|
Map map;
|
||||||
@ -68,6 +71,58 @@ int main(int argc, char ** argv)
|
|||||||
<< ", elapsed: " << watch.elapsedSeconds()
|
<< ", elapsed: " << watch.elapsedSeconds()
|
||||||
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
|
||||||
|
size_t sum_counts = 0;
|
||||||
|
size_t elems = 0;
|
||||||
|
for (const auto & kv : map)
|
||||||
|
{
|
||||||
|
sum_counts += kv.second;
|
||||||
|
++elems;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
Stopwatch watch;
|
||||||
|
|
||||||
|
typedef TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key> >, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator> Map;
|
||||||
|
//typedef HashMap<Key, Value, UniquesHashSetDefaultHash> Map;
|
||||||
|
|
||||||
|
Map map;
|
||||||
|
Map::iterator it;
|
||||||
|
bool inserted;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
map.emplace(i, it, inserted);
|
||||||
|
if (inserted)
|
||||||
|
it->second = 0;
|
||||||
|
++it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
watch.stop();
|
||||||
|
std::cerr << std::fixed << std::setprecision(2)
|
||||||
|
<< "HashMap. Size: " << map.size()
|
||||||
|
<< ", elapsed: " << watch.elapsedSeconds()
|
||||||
|
<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
|
||||||
|
<< std::endl;
|
||||||
|
|
||||||
|
size_t sum_counts = 0;
|
||||||
|
size_t elems = 0;
|
||||||
|
for (const auto & kv : map)
|
||||||
|
{
|
||||||
|
sum_counts += kv.second;
|
||||||
|
++elems;
|
||||||
|
|
||||||
|
if (kv.first > n)
|
||||||
|
std::cerr << kv.first << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
|
||||||
|
|
||||||
|
if (sum_counts != n)
|
||||||
|
std::cerr << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user