From 547f452ba98baade61171448d37efe9d5deada39 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 13 Dec 2020 13:30:25 +0300 Subject: [PATCH 1/2] HashTable erase added tests --- src/Common/HashTable/HashTable.h | 37 ++++--- src/Common/tests/gtest_hash_table.cpp | 137 ++++++++++++++++++++++---- 2 files changed, 144 insertions(+), 30 deletions(-) diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index c3fed54206e..0b925e5042f 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -998,6 +998,20 @@ public: size_t next_position = erased_key_position; + /** + * During element deletion there is a possibility that the search will be broken for one + * of the following elements, because this place erased_key_position is empty. We will check + * next_element. Consider a sequence from (erased_key_position, next_element], if the + * optimal_position of next_element falls into it, then removing erased_key_position + * will not break search for next_element. + * If optimal_position of the element does not fall into the sequence (erased_key_position, next_element] + * then deleting a erased_key_position will break search for it, so we need to move next_element + * to erased_key_position. Now we have empty place at next_element, so we apply the identical + * procedure for it. + * If an empty element is encoutered then means that there is no more next elements for which we can + * break the search so we can exit. + */ + /// Walk to the right through collision resolution chain and move elements to better positions while (true) { @@ -1014,19 +1028,20 @@ public: if (optimal_position == next_position) continue; + /// Cannot move this element because optimal position is after the freed place + /// The second condition is tricky - if the chain was overlapped before erased_key_position, + /// and the optimal position is actually before in collision resolution chain: + /// + /// [*xn***----------------***] + /// ^^-next elem ^ + /// | | + /// erased elem the optimal position of the next elem + /// + /// so, the next elem should be moved to position of erased elem + /// The case of non overlapping part of chain if (next_position > erased_key_position - /// Cannot move this element because optimal position is after the freed place - /// The second condition is tricky - if the chain was overlapped before erased_key_position, - /// and the optimal position is actually before in collision resolution chain: - /// - /// [*xn***----------------***] - /// ^^-next elem ^ - /// | | - /// erased elem the optimal position of the next elem - /// - /// so, the next elem should be moved to position of erased elem - && (optimal_position > erased_key_position) && (optimal_position < next_position)) + && (optimal_position > erased_key_position) && (optimal_position < next_position)) { continue; } diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp index bd789f27709..7caaba99132 100644 --- a/src/Common/tests/gtest_hash_table.cpp +++ b/src/Common/tests/gtest_hash_table.cpp @@ -99,35 +99,134 @@ TEST(HashTable, Iteration) TEST(HashTable, Erase) { - using Cont = HashSet, HashTableGrower<1>>; - Cont cont; - - for (size_t i = 0; i < 5000; ++i) { - cont.insert(i); - } + /// Check zero element deletion + using Cont = HashSet, HashTableGrower<4>>; + Cont cont; - for (size_t i = 0; i < 2500; ++i) + cont.insert(0); + ASSERT_TRUE(cont.find(0) != nullptr && cont.find(0)->getKey() == 0); + cont.erase(0); + ASSERT_TRUE(cont.find(0) == nullptr); + } { - cont.erase(i); - } + using Cont = HashSet, HashTableGrower<4>>; + Cont cont; - for (size_t i = 5000; i < 10000; ++i) + /// [.(1)..............] erase of (1). + cont.insert(1); + ASSERT_TRUE(cont.find(1) != nullptr && cont.find(1)->getKey() == 1); + cont.erase(1); + ASSERT_TRUE(cont.find(1) == nullptr); + } { - cont.insert(i); - } + using Cont = HashSet, HashTableGrower<4>>; + Cont cont; - for (size_t i = 5000; i < 10000; ++i) + /// [.(1)(2)(3)............] erase of (1) does not break search for (2) (3). + cont.insert(1); + cont.insert(2); + cont.insert(3); + cont.erase(1); + ASSERT_TRUE(cont.find(1) == nullptr); + ASSERT_TRUE(cont.find(2) != nullptr && cont.find(2)->getKey() == 2); + ASSERT_TRUE(cont.find(3) != nullptr && cont.find(3)->getKey() == 3); + cont.erase(2); + cont.erase(3); + ASSERT_TRUE(cont.find(2) == nullptr); + ASSERT_TRUE(cont.find(3) == nullptr); + ASSERT_EQ(cont.size(), 0); + } { - cont.erase(i); - } + using Cont = HashSet, HashTableGrower<4>>; + Cont cont; - for (size_t i = 2500; i < 5000; ++i) + /// [.(1)(17).............] erase of (1) breaks search for (17) because their natural position is 1. + cont.insert(1); + cont.insert(17); + cont.erase(1); + ASSERT_TRUE(cont.find(1) == nullptr); + ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17); + } { - cont.erase(i); - } + using Cont = HashSet, HashTableGrower<4>>; + Cont cont; - ASSERT_EQ(cont.size(), 0); + /// [.(1)(2)(3)(17)...........] erase of (2) breaks search for (17) because their natural position is 1. + + cont.insert(1); + cont.insert(2); + cont.insert(3); + cont.insert(17); + cont.erase(2); + ASSERT_TRUE(cont.find(2) == nullptr); + ASSERT_TRUE(cont.find(1) != nullptr && cont.find(1)->getKey() == 1); + ASSERT_TRUE(cont.find(3) != nullptr && cont.find(3)->getKey() == 3); + ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17); + } + { + using Cont = HashSet, HashTableGrower<4>>; + Cont cont; + + /// [(16)(30)............(14)(15)] erase of (16) breaks search for (30) because their natural position is 14. + cont.insert(14); + cont.insert(15); + cont.insert(16); + cont.insert(30); + cont.erase(16); + + ASSERT_TRUE(cont.find(16) == nullptr); + ASSERT_TRUE(cont.find(14) != nullptr && cont.find(14)->getKey() == 14); + ASSERT_TRUE(cont.find(15) != nullptr && cont.find(15)->getKey() == 15); + ASSERT_TRUE(cont.find(30) != nullptr && cont.find(30)->getKey() == 30); + } + { + using Cont = HashSet, HashTableGrower<4>>; + Cont cont; + + /// [(16)(30)............(14)(15)] erase of (15) breaks search for (30) because their natural position is 14. + cont.insert(14); + cont.insert(15); + cont.insert(16); + cont.insert(30); + cont.erase(15); + + ASSERT_TRUE(cont.find(15) == nullptr); + ASSERT_TRUE(cont.find(14) != nullptr && cont.find(14)->getKey() == 14); + ASSERT_TRUE(cont.find(16) != nullptr && cont.find(16)->getKey() == 16); + ASSERT_TRUE(cont.find(30) != nullptr && cont.find(30)->getKey() == 30); + } + { + using Cont = HashSet, HashTableGrower<1>>; + Cont cont; + + for (size_t i = 0; i < 5000; ++i) + { + cont.insert(i); + } + + for (size_t i = 0; i < 2500; ++i) + { + cont.erase(i); + } + + for (size_t i = 5000; i < 10000; ++i) + { + cont.insert(i); + } + + for (size_t i = 5000; i < 10000; ++i) + { + cont.erase(i); + } + + for (size_t i = 2500; i < 5000; ++i) + { + cont.erase(i); + } + + ASSERT_EQ(cont.size(), 0); + } } TEST(HashTable, SerializationDeserialization) From 06b047235aa4c0ccdc16b3847455622a41f8f515 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 13 Dec 2020 14:35:52 +0300 Subject: [PATCH 2/2] Fixed style issues --- src/Common/HashTable/HashTable.h | 8 ++++---- src/Common/tests/gtest_hash_table.cpp | 14 ++++++++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/Common/HashTable/HashTable.h b/src/Common/HashTable/HashTable.h index 0b925e5042f..2d5580bf709 100644 --- a/src/Common/HashTable/HashTable.h +++ b/src/Common/HashTable/HashTable.h @@ -966,7 +966,7 @@ public: * https://en.wikipedia.org/wiki/Linear_probing * https://en.wikipedia.org/wiki/Open_addressing * Algorithm without recomputing hash but keep probes difference value (difference of natural cell position and inserted one) - * in cell https://arxiv.org/ftp/arxiv/papers/0909/0909.2547.pdf + * in cell https://arxiv.org/ftp/arxiv/papers/0909/0909.2547.pdf * * Currently we use algorithm with hash recomputing on each step from https://en.wikipedia.org/wiki/Open_addressing */ @@ -999,8 +999,8 @@ public: size_t next_position = erased_key_position; /** - * During element deletion there is a possibility that the search will be broken for one - * of the following elements, because this place erased_key_position is empty. We will check + * During element deletion there is a possibility that the search will be broken for one + * of the following elements, because this place erased_key_position is empty. We will check * next_element. Consider a sequence from (erased_key_position, next_element], if the * optimal_position of next_element falls into it, then removing erased_key_position * will not break search for next_element. @@ -1011,7 +1011,7 @@ public: * If an empty element is encoutered then means that there is no more next elements for which we can * break the search so we can exit. */ - + /// Walk to the right through collision resolution chain and move elements to better positions while (true) { diff --git a/src/Common/tests/gtest_hash_table.cpp b/src/Common/tests/gtest_hash_table.cpp index 7caaba99132..41255dcbba1 100644 --- a/src/Common/tests/gtest_hash_table.cpp +++ b/src/Common/tests/gtest_hash_table.cpp @@ -105,8 +105,11 @@ TEST(HashTable, Erase) Cont cont; cont.insert(0); + ASSERT_TRUE(cont.find(0) != nullptr && cont.find(0)->getKey() == 0); + cont.erase(0); + ASSERT_TRUE(cont.find(0) == nullptr); } { @@ -115,8 +118,11 @@ TEST(HashTable, Erase) /// [.(1)..............] erase of (1). cont.insert(1); + ASSERT_TRUE(cont.find(1) != nullptr && cont.find(1)->getKey() == 1); + cont.erase(1); + ASSERT_TRUE(cont.find(1) == nullptr); } { @@ -128,9 +134,11 @@ TEST(HashTable, Erase) cont.insert(2); cont.insert(3); cont.erase(1); + ASSERT_TRUE(cont.find(1) == nullptr); ASSERT_TRUE(cont.find(2) != nullptr && cont.find(2)->getKey() == 2); ASSERT_TRUE(cont.find(3) != nullptr && cont.find(3)->getKey() == 3); + cont.erase(2); cont.erase(3); ASSERT_TRUE(cont.find(2) == nullptr); @@ -145,6 +153,7 @@ TEST(HashTable, Erase) cont.insert(1); cont.insert(17); cont.erase(1); + ASSERT_TRUE(cont.find(1) == nullptr); ASSERT_TRUE(cont.find(17) != nullptr && cont.find(17)->getKey() == 17); } @@ -159,6 +168,7 @@ TEST(HashTable, Erase) cont.insert(3); cont.insert(17); cont.erase(2); + ASSERT_TRUE(cont.find(2) == nullptr); ASSERT_TRUE(cont.find(1) != nullptr && cont.find(1)->getKey() == 1); ASSERT_TRUE(cont.find(3) != nullptr && cont.find(3)->getKey() == 3); @@ -174,7 +184,7 @@ TEST(HashTable, Erase) cont.insert(16); cont.insert(30); cont.erase(16); - + ASSERT_TRUE(cont.find(16) == nullptr); ASSERT_TRUE(cont.find(14) != nullptr && cont.find(14)->getKey() == 14); ASSERT_TRUE(cont.find(15) != nullptr && cont.find(15)->getKey() == 15); @@ -190,7 +200,7 @@ TEST(HashTable, Erase) cont.insert(16); cont.insert(30); cont.erase(15); - + ASSERT_TRUE(cont.find(15) == nullptr); ASSERT_TRUE(cont.find(14) != nullptr && cont.find(14)->getKey() == 14); ASSERT_TRUE(cont.find(16) != nullptr && cont.find(16)->getKey() == 16);