mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-21 01:00:48 +00:00
Merge pull request #18047 from kitaisreal/space-saving-remove-last-element-from-map-fix
HashTable erase added tests
This commit is contained in:
commit
cd5f827265
@ -966,7 +966,7 @@ public:
|
||||
* https://en.wikipedia.org/wiki/Linear_probing
|
||||
* https://en.wikipedia.org/wiki/Open_addressing
|
||||
* Algorithm without recomputing hash but keep probes difference value (difference of natural cell position and inserted one)
|
||||
* in cell https://arxiv.org/ftp/arxiv/papers/0909/0909.2547.pdf
|
||||
* in cell https://arxiv.org/ftp/arxiv/papers/0909/0909.2547.pdf
|
||||
*
|
||||
* Currently we use algorithm with hash recomputing on each step from https://en.wikipedia.org/wiki/Open_addressing
|
||||
*/
|
||||
@ -998,6 +998,20 @@ public:
|
||||
|
||||
size_t next_position = erased_key_position;
|
||||
|
||||
/**
|
||||
* During element deletion there is a possibility that the search will be broken for one
|
||||
* of the following elements, because this place erased_key_position is empty. We will check
|
||||
* next_element. Consider a sequence from (erased_key_position, next_element], if the
|
||||
* optimal_position of next_element falls into it, then removing erased_key_position
|
||||
* will not break search for next_element.
|
||||
* If optimal_position of the element does not fall into the sequence (erased_key_position, next_element]
|
||||
* then deleting a erased_key_position will break search for it, so we need to move next_element
|
||||
* to erased_key_position. Now we have empty place at next_element, so we apply the identical
|
||||
* procedure for it.
|
||||
* If an empty element is encoutered then means that there is no more next elements for which we can
|
||||
* break the search so we can exit.
|
||||
*/
|
||||
|
||||
/// Walk to the right through collision resolution chain and move elements to better positions
|
||||
while (true)
|
||||
{
|
||||
@ -1014,19 +1028,20 @@ public:
|
||||
if (optimal_position == next_position)
|
||||
continue;
|
||||
|
||||
/// Cannot move this element because optimal position is after the freed place
|
||||
/// The second condition is tricky - if the chain was overlapped before erased_key_position,
|
||||
/// and the optimal position is actually before in collision resolution chain:
|
||||
///
|
||||
/// [*xn***----------------***]
|
||||
/// ^^-next elem ^
|
||||
/// | |
|
||||
/// erased elem the optimal position of the next elem
|
||||
///
|
||||
/// so, the next elem should be moved to position of erased elem
|
||||
|
||||
/// The case of non overlapping part of chain
|
||||
if (next_position > erased_key_position
|
||||
/// Cannot move this element because optimal position is after the freed place
|
||||
/// The second condition is tricky - if the chain was overlapped before erased_key_position,
|
||||
/// and the optimal position is actually before in collision resolution chain:
|
||||
///
|
||||
/// [*xn***----------------***]
|
||||
/// ^^-next elem ^
|
||||
/// | |
|
||||
/// erased elem the optimal position of the next elem
|
||||
///
|
||||
/// so, the next elem should be moved to position of erased elem
|
||||
&& (optimal_position > erased_key_position) && (optimal_position < next_position))
|
||||
&& (optimal_position > erased_key_position) && (optimal_position < next_position))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
@ -99,35 +99,144 @@ TEST(HashTable, Iteration)
|
||||
|
||||
TEST(HashTable, Erase)
|
||||
{
|
||||
using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
|
||||
Cont cont;
|
||||
|
||||
for (size_t i = 0; i < 5000; ++i)
|
||||
{
|
||||
cont.insert(i);
|
||||
}
|
||||
/// Check zero element deletion
|
||||
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
||||
Cont cont;
|
||||
|
||||
for (size_t i = 0; i < 2500; ++i)
|
||||
cont.insert(0);
|
||||
|
||||
ASSERT_TRUE(cont.find(0) != nullptr && cont.find(0)->getKey() == 0);
|
||||
|
||||
cont.erase(0);
|
||||
|
||||
ASSERT_TRUE(cont.find(0) == nullptr);
|
||||
}
|
||||
{
|
||||
cont.erase(i);
|
||||
}
|
||||
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
||||
Cont cont;
|
||||
|
||||
for (size_t i = 5000; i < 10000; ++i)
|
||||
/// [.(1)..............] erase of (1).
|
||||
cont.insert(1);
|
||||
|
||||
ASSERT_TRUE(cont.find(1) != nullptr && cont.find(1)->getKey() == 1);
|
||||
|
||||
cont.erase(1);
|
||||
|
||||
ASSERT_TRUE(cont.find(1) == nullptr);
|
||||
}
|
||||
{
|
||||
cont.insert(i);
|
||||
}
|
||||
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
||||
Cont cont;
|
||||
|
||||
for (size_t i = 5000; i < 10000; ++i)
|
||||
/// [.(1)(2)(3)............] erase of (1) does not break search for (2) (3).
|
||||
cont.insert(1);
|
||||
cont.insert(2);
|
||||
cont.insert(3);
|
||||
cont.erase(1);
|
||||
|
||||
ASSERT_TRUE(cont.find(1) == nullptr);
|
||||
ASSERT_TRUE(cont.find(2) != nullptr && cont.find(2)->getKey() == 2);
|
||||
ASSERT_TRUE(cont.find(3) != nullptr && cont.find(3)->getKey() == 3);
|
||||
|
||||
cont.erase(2);
|
||||
cont.erase(3);
|
||||
ASSERT_TRUE(cont.find(2) == nullptr);
|
||||
ASSERT_TRUE(cont.find(3) == nullptr);
|
||||
ASSERT_EQ(cont.size(), 0);
|
||||
}
|
||||
{
|
||||
cont.erase(i);
|
||||
}
|
||||
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
||||
Cont cont;
|
||||
|
||||
for (size_t i = 2500; i < 5000; ++i)
|
||||
/// [.(1)(17).............] erase of (1) breaks search for (17) because their natural position is 1.
|
||||
cont.insert(1);
|
||||
cont.insert(17);
|
||||
cont.erase(1);
|
||||
|
||||
ASSERT_TRUE(cont.find(1) == nullptr);
|
||||
ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17);
|
||||
}
|
||||
{
|
||||
cont.erase(i);
|
||||
}
|
||||
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
||||
Cont cont;
|
||||
|
||||
ASSERT_EQ(cont.size(), 0);
|
||||
/// [.(1)(2)(3)(17)...........] erase of (2) breaks search for (17) because their natural position is 1.
|
||||
|
||||
cont.insert(1);
|
||||
cont.insert(2);
|
||||
cont.insert(3);
|
||||
cont.insert(17);
|
||||
cont.erase(2);
|
||||
|
||||
ASSERT_TRUE(cont.find(2) == nullptr);
|
||||
ASSERT_TRUE(cont.find(1) != nullptr && cont.find(1)->getKey() == 1);
|
||||
ASSERT_TRUE(cont.find(3) != nullptr && cont.find(3)->getKey() == 3);
|
||||
ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17);
|
||||
}
|
||||
{
|
||||
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
||||
Cont cont;
|
||||
|
||||
/// [(16)(30)............(14)(15)] erase of (16) breaks search for (30) because their natural position is 14.
|
||||
cont.insert(14);
|
||||
cont.insert(15);
|
||||
cont.insert(16);
|
||||
cont.insert(30);
|
||||
cont.erase(16);
|
||||
|
||||
ASSERT_TRUE(cont.find(16) == nullptr);
|
||||
ASSERT_TRUE(cont.find(14) != nullptr && cont.find(14)->getKey() == 14);
|
||||
ASSERT_TRUE(cont.find(15) != nullptr && cont.find(15)->getKey() == 15);
|
||||
ASSERT_TRUE(cont.find(30) != nullptr && cont.find(30)->getKey() == 30);
|
||||
}
|
||||
{
|
||||
using Cont = HashSet<int, DummyHash<int>, HashTableGrower<4>>;
|
||||
Cont cont;
|
||||
|
||||
/// [(16)(30)............(14)(15)] erase of (15) breaks search for (30) because their natural position is 14.
|
||||
cont.insert(14);
|
||||
cont.insert(15);
|
||||
cont.insert(16);
|
||||
cont.insert(30);
|
||||
cont.erase(15);
|
||||
|
||||
ASSERT_TRUE(cont.find(15) == nullptr);
|
||||
ASSERT_TRUE(cont.find(14) != nullptr && cont.find(14)->getKey() == 14);
|
||||
ASSERT_TRUE(cont.find(16) != nullptr && cont.find(16)->getKey() == 16);
|
||||
ASSERT_TRUE(cont.find(30) != nullptr && cont.find(30)->getKey() == 30);
|
||||
}
|
||||
{
|
||||
using Cont = HashSet<int, DefaultHash<int>, HashTableGrower<1>>;
|
||||
Cont cont;
|
||||
|
||||
for (size_t i = 0; i < 5000; ++i)
|
||||
{
|
||||
cont.insert(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 2500; ++i)
|
||||
{
|
||||
cont.erase(i);
|
||||
}
|
||||
|
||||
for (size_t i = 5000; i < 10000; ++i)
|
||||
{
|
||||
cont.insert(i);
|
||||
}
|
||||
|
||||
for (size_t i = 5000; i < 10000; ++i)
|
||||
{
|
||||
cont.erase(i);
|
||||
}
|
||||
|
||||
for (size_t i = 2500; i < 5000; ++i)
|
||||
{
|
||||
cont.erase(i);
|
||||
}
|
||||
|
||||
ASSERT_EQ(cont.size(), 0);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(HashTable, SerializationDeserialization)
|
||||
|
Loading…
Reference in New Issue
Block a user