2012-08-22 20:29:01 +00:00
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
|
|
#include <DB/Interpreters/HashMap.h>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** См. HashMap.h
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
template
|
|
|
|
|
<
|
|
|
|
|
typename Key,
|
|
|
|
|
typename Hash = default_hash<Key>,
|
|
|
|
|
typename ZeroTraits = default_zero_traits<Key>,
|
2013-01-20 23:47:23 +00:00
|
|
|
|
typename GrowthTraits = default_growth_traits
|
2012-08-22 20:29:01 +00:00
|
|
|
|
>
|
|
|
|
|
class HashSet : private boost::noncopyable
|
|
|
|
|
{
|
|
|
|
|
private:
|
2012-08-23 20:22:44 +00:00
|
|
|
|
friend class const_iterator;
|
|
|
|
|
friend class iterator;
|
|
|
|
|
|
2012-08-22 20:29:01 +00:00
|
|
|
|
typedef size_t HashValue;
|
2013-01-20 23:47:23 +00:00
|
|
|
|
typedef HashSet<Key, Hash, ZeroTraits, GrowthTraits> Self;
|
2012-08-22 20:29:01 +00:00
|
|
|
|
|
|
|
|
|
size_t m_size; /// Количество элементов
|
|
|
|
|
UInt8 size_degree; /// Размер таблицы в виде степени двух
|
2012-08-23 20:22:44 +00:00
|
|
|
|
bool has_zero; /// Хэш-таблица содержит элемент со значением ключа = 0.
|
2013-07-28 20:54:10 +00:00
|
|
|
|
Key zero_value; /// Нулевое значение ключа. Чтобы было, куда поставить итератор. Не static, так как нулевое значение зависит от ZeroTraits.
|
2012-08-22 20:29:01 +00:00
|
|
|
|
Key * buf; /// Кусок памяти для всех элементов кроме элемента с ключём 0.
|
|
|
|
|
|
|
|
|
|
Hash hash;
|
|
|
|
|
|
|
|
|
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
|
|
|
|
mutable size_t collisions;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
inline size_t buf_size() const { return 1 << size_degree; }
|
2013-01-20 23:47:23 +00:00
|
|
|
|
inline size_t buf_size_bytes() const { return buf_size() * sizeof(Key); }
|
2012-08-22 20:29:01 +00:00
|
|
|
|
inline size_t max_fill() const { return 1 << (size_degree - 1); }
|
|
|
|
|
inline size_t mask() const { return buf_size() - 1; }
|
|
|
|
|
inline size_t place(HashValue x) const { return x & mask(); }
|
|
|
|
|
|
|
|
|
|
|
2013-01-20 23:47:23 +00:00
|
|
|
|
/// Увеличить размер буфера в 2 ^ N раз
|
2012-08-22 20:29:01 +00:00
|
|
|
|
void resize()
|
|
|
|
|
{
|
2013-01-20 23:47:23 +00:00
|
|
|
|
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
|
|
|
|
|
Stopwatch watch;
|
|
|
|
|
#endif
|
|
|
|
|
|
2012-08-22 20:29:01 +00:00
|
|
|
|
size_t old_size = buf_size();
|
2013-01-20 23:47:23 +00:00
|
|
|
|
size_t old_size_bytes = buf_size_bytes();
|
|
|
|
|
|
|
|
|
|
size_degree += size_degree >= GrowthTraits::GROWTH_CHANGE_THRESHOLD
|
|
|
|
|
? 1
|
|
|
|
|
: GrowthTraits::FAST_GROWTH_DEGREE;
|
2012-08-22 20:29:01 +00:00
|
|
|
|
|
2013-02-03 18:07:55 +00:00
|
|
|
|
/// Расширим пространство.
|
|
|
|
|
buf = reinterpret_cast<Key *>(realloc(reinterpret_cast<void *>(buf), buf_size_bytes()));
|
2013-01-20 23:47:23 +00:00
|
|
|
|
|
2013-02-03 18:07:55 +00:00
|
|
|
|
if (NULL == buf)
|
|
|
|
|
throwFromErrno("HashSet: Cannot realloc.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
2013-01-20 23:47:23 +00:00
|
|
|
|
|
2013-02-03 18:07:55 +00:00
|
|
|
|
/// Очистим новый кусок памяти.
|
|
|
|
|
memset(buf + old_size, 0, buf_size_bytes() - old_size_bytes);
|
|
|
|
|
|
2013-01-20 23:47:23 +00:00
|
|
|
|
/** Теперь некоторые элементы может потребоваться переместить на новое место.
|
|
|
|
|
* Элемент может остаться на месте, или переместиться в новое место "справа",
|
|
|
|
|
* или переместиться левее по цепочке разрешения коллизий, из-за того, что элементы левее него были перемещены в новое место "справа".
|
|
|
|
|
*/
|
|
|
|
|
for (size_t i = 0; i < old_size; ++i)
|
|
|
|
|
if (!ZeroTraits::check(buf[i]))
|
|
|
|
|
reinsert(buf[i]);
|
|
|
|
|
|
|
|
|
|
#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
|
|
|
|
|
watch.stop();
|
|
|
|
|
std::cerr << std::fixed << std::setprecision(3)
|
|
|
|
|
<< "Resize from " << old_size << " to " << buf_size() << " took " << watch.elapsedSeconds() << " sec."
|
|
|
|
|
<< std::endl;
|
|
|
|
|
#endif
|
2012-08-22 20:29:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** Вставить в новый буфер значение, которое было в старом буфере.
|
|
|
|
|
* Используется при увеличении размера буфера.
|
|
|
|
|
*/
|
2013-01-20 23:47:23 +00:00
|
|
|
|
void reinsert(Key & x)
|
2012-08-22 20:29:01 +00:00
|
|
|
|
{
|
|
|
|
|
size_t place_value = place(hash(x));
|
2013-01-20 23:47:23 +00:00
|
|
|
|
|
|
|
|
|
/// Если элемент на своём месте.
|
|
|
|
|
if (&x == &buf[place_value])
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
/// Вычисление нового места, с учётом цепочки разрешения коллизий.
|
2012-08-22 20:29:01 +00:00
|
|
|
|
while (!ZeroTraits::check(buf[place_value]))
|
|
|
|
|
{
|
|
|
|
|
++place_value;
|
|
|
|
|
place_value &= mask();
|
|
|
|
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
|
|
|
|
++collisions;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
2013-01-20 23:47:23 +00:00
|
|
|
|
|
|
|
|
|
/// Копирование на новое место и зануление старого.
|
2012-08-22 20:29:01 +00:00
|
|
|
|
memcpy(&buf[place_value], &x, sizeof(x));
|
2013-01-20 23:47:23 +00:00
|
|
|
|
ZeroTraits::set(x);
|
|
|
|
|
|
|
|
|
|
/// Потом на старое место могут переместиться элементы, которые раньше были в коллизии с этим.
|
2012-08-22 20:29:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
typedef Key key_type;
|
|
|
|
|
typedef Key value_type;
|
2013-01-20 23:47:23 +00:00
|
|
|
|
|
|
|
|
|
|
2012-08-22 20:29:01 +00:00
|
|
|
|
HashSet() :
|
|
|
|
|
m_size(0),
|
2013-01-20 23:47:23 +00:00
|
|
|
|
size_degree(GrowthTraits::INITIAL_SIZE_DEGREE),
|
2012-08-22 20:29:01 +00:00
|
|
|
|
has_zero(false)
|
|
|
|
|
{
|
2013-07-28 20:54:10 +00:00
|
|
|
|
ZeroTraits::set(zero_value);
|
|
|
|
|
|
2013-02-03 18:07:55 +00:00
|
|
|
|
buf = reinterpret_cast<Key *>(calloc(buf_size_bytes(), 1));
|
2013-01-20 23:47:23 +00:00
|
|
|
|
|
2013-02-03 18:07:55 +00:00
|
|
|
|
if (NULL == buf)
|
|
|
|
|
throwFromErrno("HashSet: Cannot calloc.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
|
2012-08-22 20:29:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
~HashSet()
|
|
|
|
|
{
|
2012-08-23 20:22:44 +00:00
|
|
|
|
for (iterator it = begin(); it != end(); ++it)
|
|
|
|
|
it->~Key();
|
2013-01-20 23:47:23 +00:00
|
|
|
|
|
2013-02-03 18:07:55 +00:00
|
|
|
|
free(reinterpret_cast<void *>(buf));
|
2012-08-22 20:29:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
class iterator
|
|
|
|
|
{
|
|
|
|
|
Self * container;
|
|
|
|
|
Key * ptr;
|
|
|
|
|
|
|
|
|
|
friend class HashSet;
|
|
|
|
|
|
|
|
|
|
iterator(Self * container_, Key * ptr_) : container(container_), ptr(ptr_) {}
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
iterator() {}
|
|
|
|
|
|
|
|
|
|
bool operator== (const iterator & rhs) const { return ptr == rhs.ptr; }
|
|
|
|
|
bool operator!= (const iterator & rhs) const { return ptr != rhs.ptr; }
|
|
|
|
|
|
|
|
|
|
iterator & operator++()
|
|
|
|
|
{
|
2013-07-28 20:54:10 +00:00
|
|
|
|
if (unlikely(ptr == &container->zero_value))
|
2012-08-23 20:22:44 +00:00
|
|
|
|
ptr = container->buf;
|
|
|
|
|
else
|
|
|
|
|
++ptr;
|
|
|
|
|
|
|
|
|
|
while (ptr < container->buf + container->buf_size() && ZeroTraits::check(*ptr))
|
|
|
|
|
++ptr;
|
|
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Key & operator* () const { return *ptr; }
|
|
|
|
|
Key * operator->() const { return ptr; }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class const_iterator
|
|
|
|
|
{
|
|
|
|
|
const Self * container;
|
|
|
|
|
const Key * ptr;
|
|
|
|
|
|
|
|
|
|
friend class HashSet;
|
|
|
|
|
|
2012-08-23 22:27:10 +00:00
|
|
|
|
const_iterator(const Self * container_, const Key * ptr_) : container(container_), ptr(ptr_) {}
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
const_iterator() {}
|
|
|
|
|
const_iterator(const iterator & rhs) : container(rhs.container), ptr(rhs.ptr) {}
|
|
|
|
|
|
|
|
|
|
bool operator== (const const_iterator & rhs) const { return ptr == rhs.ptr; }
|
|
|
|
|
bool operator!= (const const_iterator & rhs) const { return ptr != rhs.ptr; }
|
|
|
|
|
|
|
|
|
|
const_iterator & operator++()
|
|
|
|
|
{
|
2013-07-28 20:54:10 +00:00
|
|
|
|
if (unlikely(ptr == &container->zero_value))
|
2012-08-23 20:22:44 +00:00
|
|
|
|
ptr = container->buf;
|
|
|
|
|
else
|
|
|
|
|
++ptr;
|
|
|
|
|
|
|
|
|
|
while (ptr < container->buf + container->buf_size() && ZeroTraits::check(*ptr))
|
|
|
|
|
++ptr;
|
|
|
|
|
|
|
|
|
|
return *this;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const Key & operator* () const { return *ptr; }
|
|
|
|
|
const Key * operator->() const { return ptr; }
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
const_iterator begin() const
|
|
|
|
|
{
|
|
|
|
|
if (has_zero)
|
2013-07-28 20:54:10 +00:00
|
|
|
|
return const_iterator(this, &zero_value);
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
const Key * ptr = buf;
|
|
|
|
|
while (ptr < buf + buf_size() && ZeroTraits::check(*ptr))
|
|
|
|
|
++ptr;
|
|
|
|
|
|
|
|
|
|
return const_iterator(this, ptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iterator begin()
|
|
|
|
|
{
|
|
|
|
|
if (has_zero)
|
2013-07-28 20:54:10 +00:00
|
|
|
|
return iterator(this, &zero_value);
|
2012-08-23 20:22:44 +00:00
|
|
|
|
|
|
|
|
|
Key * ptr = buf;
|
|
|
|
|
while (ptr < buf + buf_size() && ZeroTraits::check(*ptr))
|
|
|
|
|
++ptr;
|
|
|
|
|
|
|
|
|
|
return iterator(this, ptr);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const_iterator end() const { return const_iterator(this, buf + buf_size()); }
|
|
|
|
|
iterator end() { return iterator(this, buf + buf_size()); }
|
|
|
|
|
|
2012-08-22 20:29:01 +00:00
|
|
|
|
|
|
|
|
|
/// Вставить ключ.
|
2012-08-23 20:22:44 +00:00
|
|
|
|
std::pair<iterator, bool> insert(const Key & x)
|
2012-08-22 20:29:01 +00:00
|
|
|
|
{
|
|
|
|
|
if (ZeroTraits::check(x))
|
|
|
|
|
{
|
|
|
|
|
if (!has_zero)
|
|
|
|
|
{
|
|
|
|
|
++m_size;
|
|
|
|
|
has_zero = true;
|
2012-08-23 20:22:44 +00:00
|
|
|
|
return std::make_pair(begin(), true);
|
2012-08-22 20:29:01 +00:00
|
|
|
|
}
|
2012-08-23 20:22:44 +00:00
|
|
|
|
return std::make_pair(begin(), false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t place_value = place(hash(x));
|
|
|
|
|
while (!ZeroTraits::check(buf[place_value]) && buf[place_value] != x)
|
|
|
|
|
{
|
|
|
|
|
++place_value;
|
|
|
|
|
place_value &= mask();
|
|
|
|
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
|
|
|
|
++collisions;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
iterator res(this, &buf[place_value]);
|
|
|
|
|
|
|
|
|
|
if (!ZeroTraits::check(buf[place_value]) && buf[place_value] == x)
|
|
|
|
|
return std::make_pair(res, false);
|
|
|
|
|
|
|
|
|
|
buf[place_value] = x;
|
|
|
|
|
++m_size;
|
|
|
|
|
|
|
|
|
|
if (unlikely(m_size > max_fill()))
|
|
|
|
|
{
|
|
|
|
|
resize();
|
|
|
|
|
return std::make_pair(find(x), true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return std::make_pair(res, true);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void emplace(Key x, iterator & it, bool & inserted)
|
|
|
|
|
{
|
|
|
|
|
if (ZeroTraits::check(x))
|
|
|
|
|
{
|
|
|
|
|
if (!has_zero)
|
|
|
|
|
{
|
|
|
|
|
++m_size;
|
|
|
|
|
has_zero = true;
|
|
|
|
|
inserted = true;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
inserted = false;
|
|
|
|
|
|
|
|
|
|
it = begin();
|
2012-08-22 20:29:01 +00:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
size_t place_value = place(hash(x));
|
|
|
|
|
while (!ZeroTraits::check(buf[place_value]) && buf[place_value] != x)
|
|
|
|
|
{
|
|
|
|
|
++place_value;
|
|
|
|
|
place_value &= mask();
|
|
|
|
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
|
|
|
|
++collisions;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
it = iterator(this, &buf[place_value]);
|
|
|
|
|
|
2012-08-22 20:29:01 +00:00
|
|
|
|
if (!ZeroTraits::check(buf[place_value]) && buf[place_value] == x)
|
2012-08-23 20:22:44 +00:00
|
|
|
|
{
|
|
|
|
|
inserted = false;
|
2012-08-22 20:29:01 +00:00
|
|
|
|
return;
|
2012-08-23 20:22:44 +00:00
|
|
|
|
}
|
2012-08-22 20:29:01 +00:00
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
new(&buf[place_value]) Key(x);
|
|
|
|
|
inserted = true;
|
2012-08-22 20:29:01 +00:00
|
|
|
|
++m_size;
|
|
|
|
|
|
|
|
|
|
if (unlikely(m_size > max_fill()))
|
2012-08-23 20:22:44 +00:00
|
|
|
|
{
|
2012-08-22 20:29:01 +00:00
|
|
|
|
resize();
|
2012-08-23 20:22:44 +00:00
|
|
|
|
it = find(x);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
iterator find(Key x)
|
|
|
|
|
{
|
|
|
|
|
if (ZeroTraits::check(x))
|
|
|
|
|
return has_zero ? begin() : end();
|
|
|
|
|
|
|
|
|
|
size_t place_value = place(hash(x));
|
|
|
|
|
while (!ZeroTraits::check(buf[place_value]) && buf[place_value] != x)
|
|
|
|
|
{
|
|
|
|
|
++place_value;
|
|
|
|
|
place_value &= mask();
|
|
|
|
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
|
|
|
|
++collisions;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
2012-08-22 20:29:01 +00:00
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
return !ZeroTraits::check(buf[place_value]) ? iterator(this, &buf[place_value]) : end();
|
2012-08-22 20:29:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
const_iterator find(Key x) const
|
2012-08-22 20:29:01 +00:00
|
|
|
|
{
|
|
|
|
|
if (ZeroTraits::check(x))
|
2012-08-23 20:22:44 +00:00
|
|
|
|
return has_zero ? begin() : end();
|
2012-08-22 20:29:01 +00:00
|
|
|
|
|
|
|
|
|
size_t place_value = place(hash(x));
|
|
|
|
|
while (!ZeroTraits::check(buf[place_value]) && buf[place_value] != x)
|
|
|
|
|
{
|
|
|
|
|
++place_value;
|
|
|
|
|
place_value &= mask();
|
|
|
|
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
|
|
|
|
++collisions;
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-23 20:22:44 +00:00
|
|
|
|
return !ZeroTraits::check(buf[place_value]) ? const_iterator(this, &buf[place_value]) : end();
|
2012-08-22 20:29:01 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
size_t size() const
|
|
|
|
|
{
|
|
|
|
|
return m_size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool empty() const
|
|
|
|
|
{
|
|
|
|
|
return 0 == m_size;
|
|
|
|
|
}
|
2013-06-20 12:12:27 +00:00
|
|
|
|
|
2013-06-20 12:47:10 +00:00
|
|
|
|
size_t getBufferSizeInBytes() const
|
2013-06-20 12:12:27 +00:00
|
|
|
|
{
|
|
|
|
|
return buf_size_bytes();
|
|
|
|
|
}
|
2012-08-22 20:29:01 +00:00
|
|
|
|
|
|
|
|
|
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
|
|
|
|
size_t getCollisions() const
|
|
|
|
|
{
|
|
|
|
|
return collisions;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
}
|