2020-12-12 12:08:46 +00:00
|
|
|
#include <iomanip>
|
|
|
|
#include <iostream>
|
|
|
|
|
|
|
|
#include <Interpreters/AggregationCommon.h>
|
|
|
|
|
|
|
|
#include <Common/HashTable/HashMap.h>
|
|
|
|
#include <Common/HashTable/HashSet.h>
|
|
|
|
|
|
|
|
#include <IO/ReadBufferFromString.h>
|
|
|
|
|
|
|
|
#include <gtest/gtest.h>
|
|
|
|
|
|
|
|
/// To test dump functionality without using other hashes that can change
|
|
|
|
template <typename T>
|
|
|
|
struct DummyHash
|
|
|
|
{
|
|
|
|
size_t operator()(T key) const { return T(key); }
|
|
|
|
};
|
|
|
|
|
|
|
|
template<typename HashTable>
|
|
|
|
std::set<typename HashTable::value_type> convertToSet(const HashTable& table)
|
|
|
|
{
|
|
|
|
std::set<typename HashTable::value_type> result;
|
2020-12-12 15:57:07 +00:00
|
|
|
|
2020-12-12 12:08:46 +00:00
|
|
|
for (auto v: table)
|
|
|
|
result.emplace(v.getValue());
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
TEST(HashTable, Insert)
|
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
|
|
|
|
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
cont.insert(1);
|
|
|
|
cont.insert(2);
|
|
|
|
|
|
|
|
ASSERT_EQ(cont.size(), 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTable, Emplace)
|
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
|
|
|
|
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
Cont::LookupResult it;
|
|
|
|
bool inserted = false;
|
|
|
|
cont.emplace(1, it, inserted);
|
|
|
|
ASSERT_EQ(it->getKey(), 1);
|
|
|
|
ASSERT_EQ(inserted, true);
|
|
|
|
|
|
|
|
cont.emplace(2, it, inserted);
|
|
|
|
ASSERT_EQ(it->getKey(), 2);
|
|
|
|
ASSERT_EQ(inserted, true);
|
|
|
|
|
|
|
|
cont.emplace(1, it, inserted);
|
|
|
|
ASSERT_EQ(it->getKey(), 1);
|
|
|
|
ASSERT_EQ(inserted, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTable, Lookup)
|
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
|
|
|
|
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
cont.insert(1);
|
|
|
|
cont.insert(2);
|
|
|
|
|
|
|
|
Cont::LookupResult it = cont.find(1);
|
|
|
|
ASSERT_TRUE(it != nullptr);
|
|
|
|
|
|
|
|
it = cont.find(2);
|
|
|
|
ASSERT_TRUE(it != nullptr);
|
|
|
|
|
|
|
|
it = cont.find(3);
|
|
|
|
ASSERT_TRUE(it == nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTable, Iteration)
|
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
|
|
|
|
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
cont.insert(1);
|
|
|
|
cont.insert(2);
|
|
|
|
cont.insert(3);
|
|
|
|
|
|
|
|
std::set<int> expected = {1, 2, 3};
|
|
|
|
std::set<int> actual = convertToSet(cont);
|
|
|
|
|
|
|
|
ASSERT_EQ(actual, expected);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTable, Erase)
|
|
|
|
{
|
|
|
|
{
|
2020-12-13 10:30:25 +00:00
|
|
|
/// Check zero element deletion
|
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
|
|
|
Cont cont;
|
2020-12-12 12:08:46 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
cont.insert(0);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(0) != nullptr && cont.find(0)->getKey() == 0);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
cont.erase(0);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(0) == nullptr);
|
2020-12-12 12:08:46 +00:00
|
|
|
}
|
2020-12-13 10:30:25 +00:00
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
|
|
|
Cont cont;
|
2020-12-12 12:08:46 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
/// [.(1)..............] erase of (1).
|
|
|
|
cont.insert(1);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(1) != nullptr && cont.find(1)->getKey() == 1);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
cont.erase(1);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(1) == nullptr);
|
|
|
|
}
|
2020-12-12 12:08:46 +00:00
|
|
|
{
|
2020-12-13 10:30:25 +00:00
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
/// [.(1)(2)(3)............] erase of (1) does not break search for (2) (3).
|
|
|
|
cont.insert(1);
|
|
|
|
cont.insert(2);
|
|
|
|
cont.insert(3);
|
|
|
|
cont.erase(1);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(1) == nullptr);
|
|
|
|
ASSERT_TRUE(cont.find(2) != nullptr && cont.find(2)->getKey() == 2);
|
|
|
|
ASSERT_TRUE(cont.find(3) != nullptr && cont.find(3)->getKey() == 3);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
cont.erase(2);
|
|
|
|
cont.erase(3);
|
|
|
|
ASSERT_TRUE(cont.find(2) == nullptr);
|
|
|
|
ASSERT_TRUE(cont.find(3) == nullptr);
|
|
|
|
ASSERT_EQ(cont.size(), 0);
|
2020-12-12 12:08:46 +00:00
|
|
|
}
|
2020-12-13 10:30:25 +00:00
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
|
|
|
Cont cont;
|
2020-12-12 12:08:46 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
/// [.(1)(17).............] erase of (1) breaks search for (17) because their natural position is 1.
|
|
|
|
cont.insert(1);
|
|
|
|
cont.insert(17);
|
|
|
|
cont.erase(1);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(1) == nullptr);
|
|
|
|
ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17);
|
|
|
|
}
|
2020-12-12 12:08:46 +00:00
|
|
|
{
|
2020-12-13 10:30:25 +00:00
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
/// [.(1)(2)(3)(17)...........] erase of (2) breaks search for (17) because their natural position is 1.
|
|
|
|
|
|
|
|
cont.insert(1);
|
|
|
|
cont.insert(2);
|
|
|
|
cont.insert(3);
|
|
|
|
cont.insert(17);
|
|
|
|
cont.erase(2);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(2) == nullptr);
|
|
|
|
ASSERT_TRUE(cont.find(1) != nullptr && cont.find(1)->getKey() == 1);
|
|
|
|
ASSERT_TRUE(cont.find(3) != nullptr && cont.find(3)->getKey() == 3);
|
|
|
|
ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17);
|
2020-12-12 12:08:46 +00:00
|
|
|
}
|
2020-12-13 10:30:25 +00:00
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
|
|
|
Cont cont;
|
2020-12-12 12:08:46 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
/// [(16)(30)............(14)(15)] erase of (16) breaks search for (30) because their natural position is 14.
|
|
|
|
cont.insert(14);
|
|
|
|
cont.insert(15);
|
|
|
|
cont.insert(16);
|
|
|
|
cont.insert(30);
|
|
|
|
cont.erase(16);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(16) == nullptr);
|
|
|
|
ASSERT_TRUE(cont.find(14) != nullptr && cont.find(14)->getKey() == 14);
|
|
|
|
ASSERT_TRUE(cont.find(15) != nullptr && cont.find(15)->getKey() == 15);
|
|
|
|
ASSERT_TRUE(cont.find(30) != nullptr && cont.find(30)->getKey() == 30);
|
|
|
|
}
|
2020-12-12 12:08:46 +00:00
|
|
|
{
|
2020-12-13 10:30:25 +00:00
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
/// [(16)(30)............(14)(15)] erase of (15) breaks search for (30) because their natural position is 14.
|
|
|
|
cont.insert(14);
|
|
|
|
cont.insert(15);
|
|
|
|
cont.insert(16);
|
|
|
|
cont.insert(30);
|
|
|
|
cont.erase(15);
|
2020-12-13 11:35:52 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
ASSERT_TRUE(cont.find(15) == nullptr);
|
|
|
|
ASSERT_TRUE(cont.find(14) != nullptr && cont.find(14)->getKey() == 14);
|
|
|
|
ASSERT_TRUE(cont.find(16) != nullptr && cont.find(16)->getKey() == 16);
|
|
|
|
ASSERT_TRUE(cont.find(30) != nullptr && cont.find(30)->getKey() == 30);
|
2020-12-12 12:08:46 +00:00
|
|
|
}
|
2020-12-13 10:30:25 +00:00
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 5000; ++i)
|
|
|
|
{
|
|
|
|
cont.insert(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < 2500; ++i)
|
|
|
|
{
|
|
|
|
cont.erase(i);
|
|
|
|
}
|
2020-12-12 12:08:46 +00:00
|
|
|
|
2020-12-13 10:30:25 +00:00
|
|
|
for (size_t i = 5000; i < 10000; ++i)
|
|
|
|
{
|
|
|
|
cont.insert(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 5000; i < 10000; ++i)
|
|
|
|
{
|
|
|
|
cont.erase(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 2500; i < 5000; ++i)
|
|
|
|
{
|
|
|
|
cont.erase(i);
|
|
|
|
}
|
|
|
|
|
|
|
|
ASSERT_EQ(cont.size(), 0);
|
|
|
|
}
|
2020-12-12 12:08:46 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(HashTable, SerializationDeserialization)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
/// Use dummy hash to make it reproducible if default hash implementation will be changed
|
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<1>>;
|
|
|
|
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
cont.insert(1);
|
|
|
|
cont.insert(2);
|
|
|
|
cont.insert(3);
|
|
|
|
|
|
|
|
DB::WriteBufferFromOwnString wb;
|
|
|
|
cont.writeText(wb);
|
|
|
|
|
|
|
|
std::string expected = "3,1,2,3";
|
|
|
|
|
|
|
|
ASSERT_EQ(wb.str(), expected);
|
|
|
|
|
|
|
|
DB::ReadBufferFromString rb(expected);
|
|
|
|
|
|
|
|
Cont deserialized;
|
|
|
|
deserialized.readText(rb);
|
|
|
|
ASSERT_EQ(convertToSet(cont), convertToSet(deserialized));
|
|
|
|
}
|
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
|
|
|
|
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
cont.insert(1);
|
|
|
|
cont.insert(2);
|
|
|
|
cont.insert(3);
|
|
|
|
|
|
|
|
DB::WriteBufferFromOwnString wb;
|
|
|
|
cont.write(wb);
|
|
|
|
|
|
|
|
DB::ReadBufferFromString rb(wb.str());
|
|
|
|
|
|
|
|
Cont deserialized;
|
|
|
|
deserialized.read(rb);
|
|
|
|
ASSERT_EQ(convertToSet(cont), convertToSet(deserialized));
|
|
|
|
}
|
|
|
|
{
|
|
|
|
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<1>>;
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
DB::WriteBufferFromOwnString wb;
|
|
|
|
cont.writeText(wb);
|
|
|
|
|
|
|
|
std::string expected = "0";
|
|
|
|
ASSERT_EQ(wb.str(), expected);
|
|
|
|
|
|
|
|
DB::ReadBufferFromString rb(expected);
|
|
|
|
|
|
|
|
Cont deserialized;
|
|
|
|
deserialized.readText(rb);
|
|
|
|
ASSERT_EQ(convertToSet(cont), convertToSet(deserialized));
|
|
|
|
}
|
|
|
|
{
|
|
|
|
using Cont = HashSet<DB::UInt128, DB::UInt128TrivialHash>;
|
|
|
|
Cont cont;
|
|
|
|
|
|
|
|
DB::WriteBufferFromOwnString wb;
|
|
|
|
cont.write(wb);
|
|
|
|
|
|
|
|
std::string expected;
|
|
|
|
expected += static_cast<char>(0);
|
|
|
|
|
|
|
|
ASSERT_EQ(wb.str(), expected);
|
|
|
|
|
|
|
|
DB::ReadBufferFromString rb(expected);
|
|
|
|
|
|
|
|
Cont deserialized;
|
|
|
|
deserialized.read(rb);
|
|
|
|
ASSERT_EQ(convertToSet(cont), convertToSet(deserialized));
|
|
|
|
}
|
|
|
|
}
|
2021-02-14 22:12:02 +00:00
|
|
|
|
|
|
|
template <typename T>
|
|
|
|
struct IdentityHash
|
|
|
|
{
|
|
|
|
size_t operator()(T x) const { return x; }
|
|
|
|
};
|
|
|
|
|
|
|
|
struct OneElementResizeGrower
|
|
|
|
{
|
|
|
|
/// If collision resolution chains are contiguous, we can implement erase operation by moving the elements.
|
|
|
|
static constexpr auto performs_linear_probing_with_single_step = true;
|
|
|
|
|
|
|
|
static constexpr size_t initial_count = 1;
|
|
|
|
|
|
|
|
size_t bufSize() const { return buf_size; }
|
|
|
|
|
|
|
|
size_t place(size_t x) const { return x % buf_size; }
|
|
|
|
|
|
|
|
size_t next(size_t pos) const { return (pos + 1) % buf_size; }
|
|
|
|
|
|
|
|
bool overflow(size_t elems) const { return elems >= buf_size; }
|
|
|
|
|
|
|
|
void increaseSize() { ++buf_size; }
|
|
|
|
|
|
|
|
void set(size_t) { }
|
|
|
|
|
|
|
|
void setBufSize(size_t buf_size_) { buf_size = buf_size_; }
|
|
|
|
|
|
|
|
size_t buf_size = initial_count;
|
|
|
|
};
|
|
|
|
|
|
|
|
TEST(HashTable, Resize)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
/// Test edge case if after resize all cells are resized in end of buf and will take half of
|
|
|
|
/// hash table place.
|
|
|
|
using HashSet = HashSet<int, IdentityHash<int>, OneElementResizeGrower>;
|
|
|
|
HashSet cont;
|
|
|
|
|
|
|
|
cont.insert(3);
|
|
|
|
cont.insert(1);
|
|
|
|
|
|
|
|
std::set<int> expected = {1, 3};
|
|
|
|
std::set<int> actual = convertToSet(cont);
|
|
|
|
|
|
|
|
ASSERT_EQ(actual, expected);
|
|
|
|
}
|
|
|
|
}
|