From fbfbe161ecb342450f715204a32aa0781321852e Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Tue, 29 Oct 2019 23:16:51 +0800 Subject: [PATCH] Unify hash tables interface. --- dbms/programs/obfuscator/Obfuscator.cpp | 12 +- .../AggregateFunctionEntropy.h | 6 +- .../QuantileExactWeighted.h | 6 +- dbms/src/Columns/ColumnLowCardinality.cpp | 2 +- dbms/src/Common/ColumnsHashing.h | 2 +- dbms/src/Common/ColumnsHashingImpl.h | 16 +-- dbms/src/Common/HashTable/ClearableHashMap.h | 16 +-- dbms/src/Common/HashTable/ClearableHashSet.h | 12 -- .../Common/HashTable/FixedClearableHashMap.h | 34 +++-- .../Common/HashTable/FixedClearableHashSet.h | 10 +- dbms/src/Common/HashTable/FixedHashMap.h | 58 +++----- dbms/src/Common/HashTable/FixedHashSet.h | 5 +- dbms/src/Common/HashTable/FixedHashTable.h | 62 ++++---- dbms/src/Common/HashTable/HashMap.h | 56 +++----- dbms/src/Common/HashTable/HashSet.h | 8 -- dbms/src/Common/HashTable/HashTable.h | 133 ++++++++---------- dbms/src/Common/HashTable/SmallTable.h | 11 +- dbms/src/Common/HashTable/StringHashMap.h | 92 ++++++------ dbms/src/Common/HashTable/StringHashTable.h | 76 +++++++--- dbms/src/Common/HashTable/TwoLevelHashMap.h | 12 +- dbms/src/Common/HashTable/TwoLevelHashTable.h | 4 +- .../Common/HashTable/TwoLevelStringHashMap.h | 16 +-- .../HashTable/TwoLevelStringHashTable.h | 33 +++-- dbms/src/Common/SpaceSaving.h | 2 +- dbms/src/Common/tests/auto_array.cpp | 4 +- .../src/Common/tests/parallel_aggregation.cpp | 30 ++-- .../Common/tests/parallel_aggregation2.cpp | 14 +- dbms/src/Common/tests/small_table.cpp | 2 +- dbms/src/Core/tests/string_pool.cpp | 6 +- dbms/src/DataTypes/DataTypeEnum.cpp | 2 +- dbms/src/DataTypes/DataTypeEnum.h | 2 +- .../ComplexKeyCacheDictionary.cpp | 2 +- .../Dictionaries/ComplexKeyCacheDictionary.h | 10 +- .../ComplexKeyHashedDictionary.cpp | 4 +- dbms/src/Dictionaries/HashedDictionary.cpp | 4 +- .../Dictionaries/RangeHashedDictionary.cpp | 12 +- dbms/src/Functions/addressToLine.cpp | 4 +- dbms/src/Functions/array/arrayIntersect.cpp | 8 +- dbms/src/Functions/transform.cpp | 18 +-- dbms/src/Interpreters/Aggregator.h | 4 - dbms/src/Interpreters/Join.cpp | 2 +- dbms/src/Interpreters/tests/hash_map.cpp | 12 +- dbms/src/Interpreters/tests/hash_map3.cpp | 2 +- .../Interpreters/tests/hash_map_lookup.cpp | 12 +- .../Interpreters/tests/hash_map_string.cpp | 16 +-- .../Interpreters/tests/hash_map_string_2.cpp | 4 +- .../Interpreters/tests/hash_map_string_3.cpp | 4 +- .../tests/hash_map_string_small.cpp | 8 +- .../Interpreters/tests/string_hash_map.cpp | 4 +- .../Interpreters/tests/two_level_hash_map.cpp | 16 +-- .../Impl/JSONEachRowRowInputFormat.cpp | 6 +- .../Formats/Impl/TSKVRowInputFormat.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 4 +- dbms/src/Storages/StorageJoin.cpp | 8 +- utils/test-data-generator/MarkovModel.h | 12 +- 55 files changed, 439 insertions(+), 483 deletions(-) diff --git a/dbms/programs/obfuscator/Obfuscator.cpp b/dbms/programs/obfuscator/Obfuscator.cpp index be6125d77bf..2fde579f3c2 100644 --- a/dbms/programs/obfuscator/Obfuscator.cpp +++ b/dbms/programs/obfuscator/Obfuscator.cpp @@ -579,7 +579,7 @@ public: { for (auto & elem : table) { - Histogram & histogram = elem.getSecond(); + Histogram & histogram = elem.getMapped(); if (histogram.buckets.size() < params.num_buckets_cutoff) { @@ -593,7 +593,7 @@ public: { for (auto & elem : table) { - Histogram & histogram = elem.getSecond(); + Histogram & histogram = elem.getMapped(); if (!histogram.total) continue; @@ -625,7 +625,7 @@ public: { for (auto & elem : table) { - Histogram & histogram = elem.getSecond(); + Histogram & histogram = elem.getMapped(); if (!histogram.total) continue; @@ -641,7 +641,7 @@ public: { for (auto & elem : table) { - Histogram & histogram = elem.getSecond(); + Histogram & histogram = elem.getMapped(); if (!histogram.total) continue; @@ -676,7 +676,7 @@ public: while (true) { it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size())); - if (it && lookupResultGetMapped(it)->total + lookupResultGetMapped(it)->count_end != 0) + if (it && it->getMapped().total + it->getMapped().count_end != 0) break; if (context_size == 0) @@ -710,7 +710,7 @@ public: if (num_bytes_after_desired_size > 0) end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size); - CodePoint code = lookupResultGetMapped(it)->sample(determinator, end_probability_multiplier); + CodePoint code = it->getMapped().sample(determinator, end_probability_multiplier); if (code == END) break; diff --git a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h index 23f08a081db..49c7ff704f7 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -55,7 +55,7 @@ struct EntropyData void merge(const EntropyData & rhs) { for (const auto & pair : rhs.map) - map[pair.getFirst()] += pair.getSecond(); + map[pair.getKey()] += pair.getMapped(); } void serialize(WriteBuffer & buf) const @@ -77,12 +77,12 @@ struct EntropyData { UInt64 total_value = 0; for (const auto & pair : map) - total_value += pair.getSecond(); + total_value += pair.getMapped(); Float64 shannon_entropy = 0; for (const auto & pair : map) { - Float64 frequency = Float64(pair.getSecond()) / total_value; + Float64 frequency = Float64(pair.getMapped()) / total_value; shannon_entropy -= frequency * log2(frequency); } diff --git a/dbms/src/AggregateFunctions/QuantileExactWeighted.h b/dbms/src/AggregateFunctions/QuantileExactWeighted.h index 31a485ef695..6053bddc947 100644 --- a/dbms/src/AggregateFunctions/QuantileExactWeighted.h +++ b/dbms/src/AggregateFunctions/QuantileExactWeighted.h @@ -58,7 +58,7 @@ struct QuantileExactWeighted void merge(const QuantileExactWeighted & rhs) { for (const auto & pair : rhs.map) - map[pair.getFirst()] += pair.getSecond(); + map[pair.getKey()] += pair.getMapped(); } void serialize(WriteBuffer & buf) const @@ -93,7 +93,7 @@ struct QuantileExactWeighted UInt64 sum_weight = 0; for (const auto & pair : map) { - sum_weight += pair.getSecond(); + sum_weight += pair.getMapped(); array[i] = pair.getValue(); ++i; } @@ -143,7 +143,7 @@ struct QuantileExactWeighted UInt64 sum_weight = 0; for (const auto & pair : map) { - sum_weight += pair.getSecond(); + sum_weight += pair.getMapped(); array[i] = pair.getValue(); ++i; } diff --git a/dbms/src/Columns/ColumnLowCardinality.cpp b/dbms/src/Columns/ColumnLowCardinality.cpp index 32ba2378100..e7998f164af 100644 --- a/dbms/src/Columns/ColumnLowCardinality.cpp +++ b/dbms/src/Columns/ColumnLowCardinality.cpp @@ -35,7 +35,7 @@ namespace data.resize(hash_map.size()); for (const auto & val : hash_map) - data[val.getSecond()] = val.getFirst(); + data[val.getMapped()] = val.getKey(); for (auto & ind : index) ind = hash_map[ind]; diff --git a/dbms/src/Common/ColumnsHashing.h b/dbms/src/Common/ColumnsHashing.h index 28938bd43ac..6201c4d3e12 100644 --- a/dbms/src/Common/ColumnsHashing.h +++ b/dbms/src/Common/ColumnsHashing.h @@ -359,7 +359,7 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod if constexpr (has_mapped) { - auto & mapped = *lookupResultGetMapped(it); + auto & mapped = it->getMapped(); if (inserted) { new (&mapped) Mapped(); diff --git a/dbms/src/Common/ColumnsHashingImpl.h b/dbms/src/Common/ColumnsHashingImpl.h index e204242d8fe..ccea488e030 100644 --- a/dbms/src/Common/ColumnsHashingImpl.h +++ b/dbms/src/Common/ColumnsHashingImpl.h @@ -174,13 +174,13 @@ protected: [[maybe_unused]] Mapped * cached = nullptr; if constexpr (has_mapped) - cached = lookupResultGetMapped(it); + cached = &it->getMapped(); if (inserted) { if constexpr (has_mapped) { - new(lookupResultGetMapped(it)) Mapped(); + new (&it->getMapped()) Mapped(); } } @@ -191,18 +191,18 @@ protected: if constexpr (has_mapped) { - cache.value.first = *lookupResultGetKey(it); - cache.value.second = *lookupResultGetMapped(it); + cache.value.first = it->getKey(); + cache.value.second = it->getMapped(); cached = &cache.value.second; } else { - cache.value = *lookupResultGetKey(it); + cache.value = it->getKey(); } } if constexpr (has_mapped) - return EmplaceResult(*lookupResultGetMapped(it), *cached, inserted); + return EmplaceResult(it->getMapped(), *cached, inserted); else return EmplaceResult(inserted); } @@ -233,7 +233,7 @@ protected: cache.value.first = key; if (it) { - cache.value.second = *lookupResultGetMapped(it); + cache.value.second = it->getMapped(); } } else @@ -243,7 +243,7 @@ protected: } if constexpr (has_mapped) - return FindResult(it ? lookupResultGetMapped(it) : nullptr, it != nullptr); + return FindResult(it ? &it->getMapped() : nullptr, it != nullptr); else return FindResult(it != nullptr); } diff --git a/dbms/src/Common/HashTable/ClearableHashMap.h b/dbms/src/Common/HashTable/ClearableHashMap.h index e9f010cffe5..4370f6b6dc7 100644 --- a/dbms/src/Common/HashTable/ClearableHashMap.h +++ b/dbms/src/Common/HashTable/ClearableHashMap.h @@ -14,12 +14,6 @@ struct ClearableHashMapCell : public ClearableHashTableCell -ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashMapCell * cell) { return &cell->getFirst(); } - -template -ALWAYS_INLINE inline auto lookupResultGetMapped(ClearableHashMapCell * cell) { return &cell->getSecond(); } - template < typename Key, @@ -31,20 +25,16 @@ template class ClearableHashMap : public HashTable, Hash, Grower, Allocator> { public: - using key_type = Key; - using mapped_type = Mapped; - using value_type = typename ClearableHashMap::cell_type::value_type; - - mapped_type & operator[](Key x) + Mapped & operator[](const Key & x) { typename ClearableHashMap::LookupResult it; bool inserted; this->emplace(x, it, inserted); if (inserted) - new(lookupResultGetMapped(it)) mapped_type(); + new (&it->getMapped()) Mapped(); - return *lookupResultGetMapped(it); + return it->getMapped(); } void clear() diff --git a/dbms/src/Common/HashTable/ClearableHashSet.h b/dbms/src/Common/HashTable/ClearableHashSet.h index 240c32632a9..824ec9d8e5f 100644 --- a/dbms/src/Common/HashTable/ClearableHashSet.h +++ b/dbms/src/Common/HashTable/ClearableHashSet.h @@ -48,12 +48,6 @@ struct ClearableHashTableCell : public BaseCell ClearableHashTableCell(const Key & key_, const State & state) : BaseCell(key_, state), version(state.version) {} }; -template -ALWAYS_INLINE inline auto lookupResultGetKey(ClearableHashTableCell * cell) { return &cell->key; } - -template -ALWAYS_INLINE inline void * lookupResultGetMapped(ClearableHashTableCell *) { return nullptr; } - template < typename Key, @@ -64,9 +58,6 @@ template class ClearableHashSet : public HashTable>, Hash, Grower, Allocator> { public: - using key_type = Key; - using value_type = typename ClearableHashSet::cell_type::value_type; - using Base = HashTable>, Hash, Grower, Allocator>; using typename Base::LookupResult; @@ -87,9 +78,6 @@ template class ClearableHashSetWithSavedHash: public HashTable>, Hash, Grower, Allocator> { public: - using key_type = Key; - using value_type = typename ClearableHashSetWithSavedHash::cell_type::value_type; - void clear() { ++this->version; diff --git a/dbms/src/Common/HashTable/FixedClearableHashMap.h b/dbms/src/Common/HashTable/FixedClearableHashMap.h index e4a67b63446..4c8c733446e 100644 --- a/dbms/src/Common/HashTable/FixedClearableHashMap.h +++ b/dbms/src/Common/HashTable/FixedClearableHashMap.h @@ -11,6 +11,8 @@ struct FixedClearableHashMapCell using State = ClearableHashSetState; using value_type = PairNoInit; + using mapped_type = Mapped; + UInt32 version; Mapped mapped; @@ -18,11 +20,12 @@ struct FixedClearableHashMapCell FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {} FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {} - Mapped & getSecond() { return mapped; } - const Mapped & getSecond() const { return mapped; } + const VoidKey getKey() const { return {}; } + Mapped & getMapped() { return mapped; } + const Mapped & getMapped() const { return mapped; } + bool isZero(const State & state) const { return version != state.version; } void setZero() { version = 0; } - static constexpr bool need_zero_value_storage = false; struct CellExt { @@ -35,32 +38,33 @@ struct FixedClearableHashMapCell } Key key; FixedClearableHashMapCell * ptr; - const Key & getFirst() const { return key; } - Mapped & getSecond() { return ptr->mapped; } - const Mapped & getSecond() const { return *ptr->mapped; } + const Key & getKey() const { return key; } + Mapped & getMapped() { return ptr->mapped; } + const Mapped & getMapped() const { return *ptr->mapped; } const value_type getValue() const { return {key, *ptr->mapped}; } }; }; template -class FixedClearableHashMap : public FixedHashMap, Allocator> +class FixedClearableHashMap : public FixedHashMap, Allocator> { public: - using key_type = Key; - using mapped_type = Mapped; - using value_type = typename FixedClearableHashMap::cell_type::value_type; + using Base = FixedHashMap, Allocator>; + using Self = FixedClearableHashMap; + using LookupResult = typename Base::LookupResult; - mapped_type & operator[](Key x) + using Base::Base; + + Mapped & operator[](const Key & x) { - typename FixedClearableHashMap::iterator it; + LookupResult it; bool inserted; this->emplace(x, it, inserted); - if (inserted) - new (&it->second) mapped_type(); + new (&it->getMapped()) Mapped(); - return it->second; + return it->getMapped(); } void clear() diff --git a/dbms/src/Common/HashTable/FixedClearableHashSet.h b/dbms/src/Common/HashTable/FixedClearableHashSet.h index 063798ae370..32cb6df924a 100644 --- a/dbms/src/Common/HashTable/FixedClearableHashSet.h +++ b/dbms/src/Common/HashTable/FixedClearableHashSet.h @@ -10,19 +10,23 @@ struct FixedClearableHashTableCell using State = ClearableHashSetState; using value_type = Key; - using mapped_type = void; + using mapped_type = VoidMapped; UInt32 version; FixedClearableHashTableCell() {} FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {} + const VoidKey getKey() const { return {}; } + VoidMapped getMapped() const { return {}; } + bool isZero(const State & state) const { return version != state.version; } void setZero() { version = 0; } - static constexpr bool need_zero_value_storage = false; struct CellExt { Key key; + const VoidKey getKey() const { return {}; } + VoidMapped getMapped() const { return {}; } const value_type & getValue() const { return key; } void update(Key && key_, FixedClearableHashTableCell *) { key = key_; } }; @@ -34,8 +38,6 @@ class FixedClearableHashSet : public FixedHashTable, Allocator>; - using key_type = Key; - using value_type = typename FixedClearableHashSet::cell_type::value_type; using LookupResult = typename Base::LookupResult; void clear() diff --git a/dbms/src/Common/HashTable/FixedHashMap.h b/dbms/src/Common/HashTable/FixedHashMap.h index 986b4af67c0..15c315bc6d6 100644 --- a/dbms/src/Common/HashTable/FixedHashMap.h +++ b/dbms/src/Common/HashTable/FixedHashMap.h @@ -13,18 +13,19 @@ struct FixedHashMapCell using value_type = PairNoInit; using mapped_type = TMapped; - Mapped mapped; bool full; + Mapped mapped; FixedHashMapCell() {} FixedHashMapCell(const Key &, const State &) : full(true) {} FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {} - Mapped & getSecond() { return mapped; } - const Mapped & getSecond() const { return mapped; } + const VoidKey getKey() const { return {}; } + Mapped & getMapped() { return mapped; } + const Mapped & getMapped() const { return mapped; } + bool isZero(const State &) const { return !full; } void setZero() { full = false; } - static constexpr bool need_zero_value_storage = false; /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field. /// Note that we have to assemble a continuous layout for the value_type on each call of getValue(). @@ -40,36 +41,23 @@ struct FixedHashMapCell Key key; FixedHashMapCell * ptr; - const Key & getFirst() const { return key; } - Mapped & getSecond() { return ptr->mapped; } - const Mapped & getSecond() const { return ptr->mapped; } + const Key & getKey() const { return key; } + Mapped & getMapped() { return ptr->mapped; } + const Mapped & getMapped() const { return ptr->mapped; } const value_type getValue() const { return {key, ptr->mapped}; } }; }; -template -ALWAYS_INLINE inline void * lookupResultGetKey(FixedHashMapCell *) -{ return nullptr; } - -template -ALWAYS_INLINE inline auto lookupResultGetMapped(FixedHashMapCell * cell) -{ return &cell->getSecond(); } - -template -class FixedHashMap : public FixedHashTable, Allocator> +template , typename Allocator = HashTableAllocator> +class FixedHashMap : public FixedHashTable { public: - using Base = FixedHashTable, Allocator>; + using Base = FixedHashTable; using Self = FixedHashMap; - using key_type = Key; - using Cell = typename Base::cell_type; - using value_type = typename Cell::value_type; - using mapped_type = typename Cell::Mapped; + using LookupResult = typename Base::LookupResult; using Base::Base; - using LookupResult = typename Base::LookupResult; - template void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { @@ -77,8 +65,8 @@ public: { typename Self::LookupResult res_it; bool inserted; - that.emplace(it->getFirst(), res_it, inserted, it.getHash()); - func(*lookupResultGetMapped(res_it), it->getSecond(), inserted); + that.emplace(it->getKey(), res_it, inserted, it.getHash()); + func(res_it->getMapped(), it->getMapped(), inserted); } } @@ -87,11 +75,11 @@ public: { for (auto it = this->begin(), end = this->end(); it != end; ++it) { - auto res_it = that.find(it->getFirst(), it.getHash()); + auto res_it = that.find(it->getKey(), it.getHash()); if (!res_it) - func(it->getSecond(), it->getSecond(), false); + func(it->getMapped(), it->getMapped(), false); else - func(*lookupResultGetMapped(res_it), it->getSecond(), true); + func(res_it->getMapped(), it->getMapped(), true); } } @@ -99,24 +87,24 @@ public: void forEachValue(Func && func) { for (auto & v : *this) - func(v.getFirst(), v.getSecond()); + func(v.getKey(), v.getMapped()); } template void forEachMapped(Func && func) { for (auto & v : *this) - func(v.getSecond()); + func(v.getMapped()); } - mapped_type & ALWAYS_INLINE operator[](Key x) + Mapped & ALWAYS_INLINE operator[](const Key & x) { - typename Base::LookupResult it; + LookupResult it; bool inserted; this->emplace(x, it, inserted); if (inserted) - new (it) mapped_type(); + new (&it->getMapped()) Mapped(); - return it; + return it->getMapped(); } }; diff --git a/dbms/src/Common/HashTable/FixedHashSet.h b/dbms/src/Common/HashTable/FixedHashSet.h index 14e92b5c5fd..ce3666944dd 100644 --- a/dbms/src/Common/HashTable/FixedHashSet.h +++ b/dbms/src/Common/HashTable/FixedHashSet.h @@ -6,14 +6,15 @@ template class FixedHashSet : public FixedHashTable, Allocator> { public: - using Base = FixedHashTable, Allocator>; + using Cell = FixedHashTableCell; + using Base = FixedHashTable; using Self = FixedHashSet; void merge(const Self & rhs) { for (size_t i = 0; i < Base::BUFFER_SIZE; ++i) if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this)) - Base::buf[i] = rhs.buf[i]; + new (&Base::buf[i]) Cell(rhs.buf[i]); } /// NOTE: Currently this method isn't used. When it does, the ReadBuffer should diff --git a/dbms/src/Common/HashTable/FixedHashTable.h b/dbms/src/Common/HashTable/FixedHashTable.h index aadce906dc2..5779eaa4981 100644 --- a/dbms/src/Common/HashTable/FixedHashTable.h +++ b/dbms/src/Common/HashTable/FixedHashTable.h @@ -8,12 +8,15 @@ struct FixedHashTableCell using State = TState; using value_type = Key; - using mapped_type = void; + using mapped_type = VoidMapped; bool full; FixedHashTableCell() {} FixedHashTableCell(const Key &, const State &) : full(true) {} + const VoidKey getKey() const { return {}; } + VoidMapped getMapped() const { return {}; } + bool isZero(const State &) const { return !full; } void setZero() { full = false; } static constexpr bool need_zero_value_storage = false; @@ -28,6 +31,8 @@ struct FixedHashTableCell { Key key; + const VoidKey getKey() const { return {}; } + VoidMapped getMapped() const { return {}; } const value_type & getValue() const { return key; } void update(Key && key_, FixedHashTableCell *) { key = key_; } }; @@ -53,7 +58,7 @@ struct FixedHashTableCell template class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State { - static constexpr size_t BUFFER_SIZE = 1ULL << (sizeof(Key) * 8); + static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8); protected: friend class const_iterator; @@ -61,12 +66,11 @@ protected: friend class Reader; using Self = FixedHashTable; - using cell_type = Cell; size_t m_size = 0; /// Amount of elements - Cell * buf; /// A piece of memory for all elements except the element with zero key. + Cell * buf; /// A piece of memory for all elements. - void alloc() { buf = reinterpret_cast(Allocator::alloc(BUFFER_SIZE * sizeof(Cell))); } + void alloc() { buf = reinterpret_cast(Allocator::alloc(NUM_CELLS * sizeof(Cell))); } void free() { @@ -111,7 +115,7 @@ protected: ++ptr; /// Skip empty cells in the main buffer. - auto buf_end = container->buf + container->BUFFER_SIZE; + auto buf_end = container->buf + container->NUM_CELLS; while (ptr < buf_end && ptr->isZero(*container)) ++ptr; @@ -140,8 +144,9 @@ protected: public: using key_type = Key; - using value_type = typename Cell::value_type; using mapped_type = typename Cell::mapped_type; + using value_type = typename Cell::value_type; + using cell_type = Cell; using LookupResult = Cell *; using ConstLookupResult = const Cell *; @@ -239,7 +244,7 @@ public: return end(); const Cell * ptr = buf; - auto buf_end = buf + BUFFER_SIZE; + auto buf_end = buf + NUM_CELLS; while (ptr < buf_end && ptr->isZero(*this)) ++ptr; @@ -254,21 +259,21 @@ public: return end(); Cell * ptr = buf; - auto buf_end = buf + BUFFER_SIZE; + auto buf_end = buf + NUM_CELLS; while (ptr < buf_end && ptr->isZero(*this)) ++ptr; return iterator(this, ptr); } - const_iterator end() const { return const_iterator(this, buf + BUFFER_SIZE); } + const_iterator end() const { return const_iterator(this, buf + NUM_CELLS); } const_iterator cend() const { return end(); } - iterator end() { return iterator(this, buf + BUFFER_SIZE); } + iterator end() { return iterator(this, buf + NUM_CELLS); } public: /// The last parameter is unused but exists for compatibility with HashTable interface. - void ALWAYS_INLINE emplace(Key x, LookupResult & it, bool & inserted, size_t /* hash */ = 0) + void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0) { it = &buf[x]; @@ -288,40 +293,31 @@ public: std::pair res; emplace(Cell::getKey(x), res.first, res.second); if (res.second) - insertSetMapped(lookupResultGetMapped(res.first), x); + insertSetMapped(res.first->getMapped(), x); return res; } - LookupResult ALWAYS_INLINE find(Key x) - { - return !buf[x].isZero(*this) ? &buf[x] : nullptr; - } + LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].isZero(*this) ? &buf[x] : nullptr; } - ConstLookupResult ALWAYS_INLINE find(Key x) const - { - return const_cast *>(this)->find(x); - } + ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast *>(this)->find(x); } - LookupResult ALWAYS_INLINE find(Key, size_t hash_value) - { - return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; - } + LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].isZero(*this) ? &buf[hash_value] : nullptr; } - ConstLookupResult ALWAYS_INLINE find(Key key, size_t hash_value) const + ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const { return const_cast *>(this)->find(key, hash_value); } - bool ALWAYS_INLINE has(Key x) const { return !buf[x].isZero(*this); } - bool ALWAYS_INLINE has(Key, size_t hash_value) const { return !buf[hash_value].isZero(*this); } + bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].isZero(*this); } + bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].isZero(*this); } void write(DB::WriteBuffer & wb) const { Cell::State::write(wb); DB::writeVarUInt(m_size, wb); - for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr) + for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) if (!ptr->isZero(*this)) { DB::writeVarUInt(ptr - buf); @@ -334,7 +330,7 @@ public: Cell::State::writeText(wb); DB::writeText(m_size, wb); - for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr) + for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) { if (!ptr->isZero(*this)) { @@ -393,7 +389,7 @@ public: destroyElements(); m_size = 0; - memset(static_cast(buf), 0, BUFFER_SIZE * sizeof(*buf)); + memset(static_cast(buf), 0, NUM_CELLS * sizeof(*buf)); } /// After executing this function, the table can only be destroyed, @@ -405,9 +401,9 @@ public: free(); } - size_t getBufferSizeInBytes() const { return BUFFER_SIZE * sizeof(Cell); } + size_t getBufferSizeInBytes() const { return NUM_CELLS * sizeof(Cell); } - size_t getBufferSizeInCells() const { return BUFFER_SIZE; } + size_t getBufferSizeInCells() const { return NUM_CELLS; } #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS size_t getCollisions() const { return 0; } diff --git a/dbms/src/Common/HashTable/HashMap.h b/dbms/src/Common/HashTable/HashMap.h index f273d5bcdc7..cdc4a003af8 100644 --- a/dbms/src/Common/HashTable/HashMap.h +++ b/dbms/src/Common/HashTable/HashMap.h @@ -52,12 +52,13 @@ struct HashMapCell HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {} HashMapCell(const value_type & value_, const State &) : value(value_) {} - const Key & getFirst() const { return value.first; } - Mapped & getSecond() { return value.second; } - const Mapped & getSecond() const { return value.second; } - + /// Get the key (externally). + const Key & getKey() const { return value.first; } + Mapped & getMapped() { return value.second; } + const Mapped & getMapped() const { return value.second; } const value_type & getValue() const { return value; } + /// Get the key (internally). static const Key & getKey(const value_type & value) { return value.first; } bool keyEquals(const Key & key_) const { return value.first == key_; } @@ -110,15 +111,6 @@ struct HashMapCell } }; -template -ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCell * cell) -{ return &cell->getFirst(); } - -template -ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCell * cell) -{ return &cell->getSecond(); } - - template struct HashMapCellWithSavedHash : public HashMapCell { @@ -136,15 +128,6 @@ struct HashMapCellWithSavedHash : public HashMapCell size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } }; -template -ALWAYS_INLINE inline auto lookupResultGetKey(HashMapCellWithSavedHash * cell) -{ return &cell->getFirst(); } - -template -ALWAYS_INLINE inline auto lookupResultGetMapped(HashMapCellWithSavedHash * cell) -{ return &cell->getSecond(); } - - template < typename Key, typename Cell, @@ -156,14 +139,9 @@ class HashMapTable : public HashTable public: using Self = HashMapTable; using Base = HashTable; - - using key_type = Key; - using value_type = typename Cell::value_type; - using mapped_type = typename Cell::Mapped; - using LookupResult = typename Base::LookupResult; - using HashTable::HashTable; + using Base::Base; /// Merge every cell's value of current map into the destination map via emplace. /// Func should have signature void(Mapped & dst, Mapped & src, bool emplaced). @@ -178,8 +156,8 @@ public: { typename Self::LookupResult res_it; bool inserted; - that.emplace(it->getFirst(), res_it, inserted, it.getHash()); - func(*lookupResultGetMapped(res_it), it->getSecond(), inserted); + that.emplace(Cell::getKey(it->getValue()), res_it, inserted, it.getHash()); + func(res_it->getMapped(), it->getMapped(), inserted); } } @@ -193,11 +171,11 @@ public: { for (auto it = this->begin(), end = this->end(); it != end; ++it) { - auto res_it = that.find(it->getFirst(), it.getHash()); + auto res_it = that.find(Cell::getKey(it->getValue()), it.getHash()); if (!res_it) - func(it->getSecond(), it->getSecond(), false); + func(it->getMapped(), it->getMapped(), false); else - func(*lookupResultGetMapped(res_it), it->getSecond(), true); + func(res_it->getMapped(), it->getMapped(), true); } } @@ -206,7 +184,7 @@ public: void forEachValue(Func && func) { for (auto & v : *this) - func(v.getFirst(), v.getSecond()); + func(v.getKey(), v.getMapped()); } /// Call func(Mapped &) for each hash map element. @@ -214,12 +192,12 @@ public: void forEachMapped(Func && func) { for (auto & v : *this) - func(v.getSecond()); + func(v.getMapped()); } - mapped_type & ALWAYS_INLINE operator[](Key x) + typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) { - typename HashMapTable::LookupResult it; + LookupResult it; bool inserted; this->emplace(x, it, inserted); @@ -238,9 +216,9 @@ public: * the compiler can not guess about this, and generates the `load`, `increment`, `store` code. */ if (inserted) - new(lookupResultGetMapped(it)) mapped_type(); + new (&it->getMapped()) typename Cell::Mapped(); - return *lookupResultGetMapped(it); + return it->getMapped(); } }; diff --git a/dbms/src/Common/HashTable/HashSet.h b/dbms/src/Common/HashTable/HashSet.h index 4b3aa5204ea..2589329e5ef 100644 --- a/dbms/src/Common/HashTable/HashSet.h +++ b/dbms/src/Common/HashTable/HashSet.h @@ -84,14 +84,6 @@ struct HashSetCellWithSavedHash : public HashTableCell size_t getHash(const Hash & /*hash_function*/) const { return saved_hash; } }; -template -ALWAYS_INLINE inline auto lookupResultGetKey(HashSetCellWithSavedHash * cell) -{ return &cell->key; } - -template -ALWAYS_INLINE inline void * lookupResultGetMapped(HashSetCellWithSavedHash *) -{ return nullptr; } - template < typename Key, diff --git a/dbms/src/Common/HashTable/HashTable.h b/dbms/src/Common/HashTable/HashTable.h index 398b4b594da..5521cc043ad 100644 --- a/dbms/src/Common/HashTable/HashTable.h +++ b/dbms/src/Common/HashTable/HashTable.h @@ -78,66 +78,48 @@ void set(T & x) { x = 0; } } /** - * lookupResultGetKey/Mapped -- functions to get key/"mapped" values from the - * LookupResult returned by find() and emplace() methods of HashTable. - * Must not be called for a null LookupResult. + * getKey/Mapped -- methods to get key/"mapped" values from the LookupResult returned by find() and + * emplace() methods of HashTable. Must not be called for a null LookupResult. * - * We don't use iterators for lookup result to avoid creating temporary - * objects. Instead, LookupResult is a pointer of some kind. There are global - * functions lookupResultGetKey/Mapped, overloaded for this pointer type, that - * return pointers to key/"mapped" values. They are implemented as global - * functions and not as methods, because they have to be overloaded for POD - * types, e.g. in StringHashTable where different components have different - * Cell format. + * We don't use iterators for lookup result. Instead, LookupResult is a pointer of some kind. There + * are methods getKey/Mapped, that return references or values to key/"mapped" values. * - * Different hash table implementations support this interface to a varying - * degree: + * Different hash table implementations support this interface to a varying degree: * - * 1) Hash tables that store neither the key in its original form, nor a - * "mapped" value: FixedHashTable or StringHashTable. - * Neither GetKey nor GetMapped are supported, the only valid operation is - * checking LookupResult for null. + * 1) Hash tables that store neither the key in its original form, nor a "mapped" value: + * FixedHashTable or StringHashTable. Neither GetKey nor GetMapped are supported, the only valid + * operation is checking LookupResult for null. * - * 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap. - * Only GetMapped is supported. + * 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap. Only GetMapped is + * supported. * - * 3) Hash tables that store the key and do not have a "mapped" value, e.g. the - * normal HashTable. - * GetKey returns the key, and GetMapped returns a zero void pointer. This - * simplifies generic code that works with mapped values: it can overload - * on the return type of GetMapped(), and doesn't need other parameters. One - * example is insertSetMapped() function. + * 3) Hash tables that store the key and do not have a "mapped" value, e.g. the normal HashTable. + * GetKey returns the key, and GetMapped returns a zero void pointer. This simplifies generic + * code that works with mapped values: it can overload on the return type of GetMapped(), and + * doesn't need other parameters. One example is insertSetMapped() function. * - * 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. - * Both GetKey and GetMapped are supported. + * 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap. Both GetKey and + * GetMapped are supported. * * The implementation side goes as follows: - * for (1), LookupResult = void *, no getters; - * for (2), LookupResult = Mapped *, GetMapped is a default implementation that - * takes any pointer-like object; - * for (3) and (4), LookupResult = Cell *, and both getters are implemented. - * They have to be specialized for each particular Cell class to supersede the - * default verision that takes a generic pointer-like object. + * + * for (1), LookupResult->getKey = const VoidKey, LookupResult->getMapped = VoidMapped; + * + * for (2), LookupResult->getKey = const VoidKey, LookupResult->getMapped = Mapped &; + * + * for (3) and (4), LookupResult->getKey = const Key [&], LookupResult->getMapped = Mapped &; + * VoidKey and VoidMapped may have specialized function overloads for generic code. */ -/** - * The default implementation of GetMapped that is used for the above case (2). - */ -template -ALWAYS_INLINE inline auto lookupResultGetMapped(PointerLike && ptr) { return &*ptr; } - -/** - * Generic const wrapper for lookupResultGetMapped, that calls a non-const - * version. Should be safe, given that these functions only do pointer - * arithmetics. - */ -template -ALWAYS_INLINE inline auto lookupResultGetMapped(const T * obj) +struct VoidKey {}; +struct VoidMapped { - auto mapped_ptr = lookupResultGetMapped(const_cast(obj)); - const auto const_mapped_ptr = mapped_ptr; - return const_mapped_ptr; -} + template + auto & operator=(const T &) + { + return *this; + } +}; /** Compile-time interface for cell of the hash table. * Different cell types are used to implement different hash tables. @@ -152,7 +134,7 @@ struct HashTableCell using key_type = Key; using value_type = Key; - using mapped_type = void; + using mapped_type = VoidMapped; Key key; @@ -161,10 +143,12 @@ struct HashTableCell /// Create a cell with the given key / key and value. HashTableCell(const Key & key_, const State &) : key(key_) {} - /// Get what the value_type of the container will be. + /// Get the key (externally). + const Key & getKey() const { return key; } + VoidMapped getMapped() const { return {}; } const value_type & getValue() const { return key; } - /// Get the key. + /// Get the key (internally). static const Key & getKey(const value_type & value) { return value; } /// Are the keys at the cells equal? @@ -207,23 +191,15 @@ struct HashTableCell void readText(DB::ReadBuffer & rb) { DB::readDoubleQuoted(key, rb); } }; -template -ALWAYS_INLINE inline auto lookupResultGetKey(HashTableCell * cell) -{ return &cell->key; } - -template -ALWAYS_INLINE inline void * lookupResultGetMapped(HashTableCell *) -{ return nullptr; } - /** * A helper function for HashTable::insert() to set the "mapped" value. - * Overloaded on the mapped type, does nothing if it's void. + * Overloaded on the mapped type, does nothing if it's VoidMapped. */ template -void insertSetMapped(void * /* dest */, const ValueType & /* src */) {} +void insertSetMapped(VoidMapped /* dest */, const ValueType & /* src */) {} template -void insertSetMapped(MappedType * dest, const ValueType & src) { *dest = src.second; } +void insertSetMapped(MappedType & dest, const ValueType & src) { dest = src.second; } /** Determines the size of the hash table, and when and how much it should be resized. @@ -276,7 +252,7 @@ struct HashTableGrower /** When used as a Grower, it turns a hash table into something like a lookup table. * It remains non-optimal - the cells store the keys. * Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed. - * TODO Make a proper lookup table. + * NOTE: Better to use FixedHashTable instead. */ template struct HashTableFixedGrower @@ -366,7 +342,6 @@ protected: using HashValue = size_t; using Self = HashTable; - using cell_type = Cell; size_t m_size = 0; /// Amount of elements Cell * buf; /// A piece of memory for all elements except the element with zero key. @@ -586,9 +561,10 @@ protected: public: using key_type = Key; + using mapped_type = typename Cell::mapped_type; using value_type = typename Cell::value_type; + using cell_type = Cell; - // Use lookupResultGetMapped/Key to work with these values. using LookupResult = Cell *; using ConstLookupResult = const Cell *; @@ -751,7 +727,7 @@ protected: /// If the key is zero, insert it into a special place and return true. /// We don't have to persist a zero key, because it's not actually inserted. /// That's why we just take a Key by value, an not a key holder. - bool ALWAYS_INLINE emplaceIfZero(Key x, LookupResult & it, bool & inserted, size_t hash_value) + bool ALWAYS_INLINE emplaceIfZero(const Key & x, LookupResult & it, bool & inserted, size_t hash_value) { /// If it is claimed that the zero key can not be inserted into the table. if (!Cell::need_zero_value_storage) @@ -793,7 +769,7 @@ protected: keyHolderPersistKey(key_holder); const auto & key = keyHolderGetKey(key_holder); - new(&buf[place_value]) Cell(key, *this); + new (&buf[place_value]) Cell(key, *this); buf[place_value].setHash(hash_value); inserted = true; ++m_size; @@ -846,7 +822,7 @@ public: } if (res.second) - insertSetMapped(lookupResultGetMapped(res.first), x); + insertSetMapped(res.first->getMapped(), x); return res; } @@ -869,11 +845,11 @@ public: * * Example usage: * - * Map::iterator it; + * Map::LookupResult it; * bool inserted; * map.emplace(key, it, inserted); * if (inserted) - * new(&it->second) Mapped(value); + * new (&it->getMapped()) Mapped(value); */ template void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) @@ -903,7 +879,7 @@ public: resize(); } - LookupResult ALWAYS_INLINE find(Key x) + LookupResult ALWAYS_INLINE find(const Key & x) { if (Cell::isZero(x, *this)) return this->hasZero() ? this->zeroValue() : nullptr; @@ -913,12 +889,12 @@ public: return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr; } - ConstLookupResult ALWAYS_INLINE find(Key x) const + ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast *>(this)->find(x); } - LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) + LookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value) { if (Cell::isZero(x, *this)) return this->hasZero() ? this->zeroValue() : nullptr; @@ -927,7 +903,12 @@ public: return !buf[place_value].isZero(*this) ? &buf[place_value] : nullptr; } - bool ALWAYS_INLINE has(Key x) const + ConstLookupResult ALWAYS_INLINE find(const Key & x, size_t hash_value) const + { + return const_cast *>(this)->find(x, hash_value); + } + + bool ALWAYS_INLINE has(const Key & x) const { if (Cell::isZero(x, *this)) return this->hasZero(); @@ -938,7 +919,7 @@ public: } - bool ALWAYS_INLINE has(Key x, size_t hash_value) const + bool ALWAYS_INLINE has(const Key & x, size_t hash_value) const { if (Cell::isZero(x, *this)) return this->hasZero(); diff --git a/dbms/src/Common/HashTable/SmallTable.h b/dbms/src/Common/HashTable/SmallTable.h index 8f02c29c31e..d9d0fb19a2f 100644 --- a/dbms/src/Common/HashTable/SmallTable.h +++ b/dbms/src/Common/HashTable/SmallTable.h @@ -38,7 +38,6 @@ protected: friend class Reader; using Self = SmallTable; - using cell_type = Cell; size_t m_size = 0; /// Amount of elements. Cell buf[capacity]; /// A piece of memory for all elements. @@ -72,8 +71,9 @@ protected: public: using key_type = Key; + using mapped_type = typename Cell::mapped_type; using value_type = typename Cell::value_type; - + using cell_type = Cell; class Reader final : private Cell::State { @@ -391,16 +391,17 @@ class SmallMapTable : public SmallTable { public: using key_type = Key; - using mapped_type = typename Cell::Mapped; + using mapped_type = typename Cell::mapped_type; using value_type = typename Cell::value_type; + using cell_type = Cell; mapped_type & ALWAYS_INLINE operator[](Key x) { typename SmallMapTable::iterator it; bool inserted; this->emplace(x, it, inserted); - new(&it->getSecond()) mapped_type(); - return it->getSecond(); + new (&it->getMapped()) mapped_type(); + return it->getMapped(); } }; diff --git a/dbms/src/Common/HashTable/StringHashMap.h b/dbms/src/Common/HashTable/StringHashMap.h index 4fcc46eee24..3ee59c89a36 100644 --- a/dbms/src/Common/HashTable/StringHashMap.h +++ b/dbms/src/Common/HashTable/StringHashMap.h @@ -8,43 +8,60 @@ template struct StringHashMapCell : public HashMapCell { using Base = HashMapCell; + using value_type = typename Base::value_type; using Base::Base; static constexpr bool need_zero_value_storage = false; + // external + const StringRef getKey() const { return toStringRef(this->value.first); } + // internal + static const Key & getKey(const value_type & value_) { return value_.first; } }; -template -auto lookupResultGetMapped(StringHashMapCell * cell) { return &cell->getSecond(); } - template struct StringHashMapCell : public HashMapCell { using Base = HashMapCell; + using value_type = typename Base::value_type; using Base::Base; static constexpr bool need_zero_value_storage = false; bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method static bool isZero(const StringKey16 & key, const HashTableNoState & /*state*/) { return key.low == 0; } void setZero() { this->value.first.low = 0; } + // external + const StringRef getKey() const { return toStringRef(this->value.first); } + // internal + static const StringKey16 & getKey(const value_type & value_) { return value_.first; } }; template struct StringHashMapCell : public HashMapCell { using Base = HashMapCell; + using value_type = typename Base::value_type; using Base::Base; static constexpr bool need_zero_value_storage = false; bool isZero(const HashTableNoState & state) const { return isZero(this->value.first, state); } // Assuming String does not contain zero bytes. NOTE: Cannot be used in serialized method static bool isZero(const StringKey24 & key, const HashTableNoState & /*state*/) { return key.a == 0; } void setZero() { this->value.first.a = 0; } + // external + const StringRef getKey() const { return toStringRef(this->value.first); } + // internal + static const StringKey24 & getKey(const value_type & value_) { return value_.first; } }; template struct StringHashMapCell : public HashMapCellWithSavedHash { using Base = HashMapCellWithSavedHash; + using value_type = typename Base::value_type; using Base::Base; static constexpr bool need_zero_value_storage = false; + // external + using Base::getKey; + // internal + static const StringRef & getKey(const value_type & value_) { return value_.first; } }; template @@ -61,13 +78,10 @@ template class StringHashMap : public StringHashTable> { public: + using Key = StringRef; using Base = StringHashTable>; using Self = StringHashMap; - using Key = StringRef; - using key_type = StringRef; - using mapped_type = TMapped; - using value_type = typename Base::Ts::value_type; - using LookupResult = mapped_type *; + using LookupResult = typename Base::LookupResult; using Base::Base; @@ -80,18 +94,13 @@ public: template void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { - if (this->m0.hasZero()) + if (this->m0.hasZero() && that.m0.hasZero()) + func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false); + else if (this->m0.hasZero()) { - const bool emplace_new_zero = !that.m0.hasZero(); - if (emplace_new_zero) - { - that.m0.setHasZero(); - } - - func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), - emplace_new_zero); + that.m0.setHasZero(); + func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true); } - this->m1.mergeToViaEmplace(that.m1, func); this->m2.mergeToViaEmplace(that.m2, func); this->m3.mergeToViaEmplace(that.m3, func); @@ -106,32 +115,25 @@ public: template void ALWAYS_INLINE mergeToViaFind(Self & that, Func && func) { - if (this->m0.hasZero()) - { - if (that.m0.hasZero()) - { - func(that.m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), true); - } - else - { - func(this->m0.zeroValue()->getSecond(), this->m0.zeroValue()->getSecond(), false); - } - } - + if (this->m0.size() && that.m0.size()) + func(that.m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), true); + else if (this->m0.size()) + func(this->m0.zeroValue()->getMapped(), this->m0.zeroValue()->getMapped(), false); this->m1.mergeToViaFind(that.m1, func); this->m2.mergeToViaFind(that.m2, func); this->m3.mergeToViaFind(that.m3, func); this->ms.mergeToViaFind(that.ms, func); } - mapped_type & ALWAYS_INLINE operator[](Key x) + TMapped & ALWAYS_INLINE operator[](const Key & x) { + LookupResult it; bool inserted; - LookupResult it = nullptr; - emplace(x, it, inserted); + this->emplace(x, it, inserted); if (inserted) - new (it) mapped_type(); - return *it; + new (&it->getMapped()) TMapped(); + + return it->getMapped(); } template @@ -139,27 +141,27 @@ public: { if (this->m0.size()) { - func(StringRef{}, this->m0.zeroValue()->getSecond()); + func(StringRef{}, this->m0.zeroValue()->getMapped()); } for (auto & v : this->m1) { - func(toStringRef(v.getFirst()), v.getSecond()); + func(v.getKey(), v.getMapped()); } for (auto & v : this->m2) { - func(toStringRef(v.getFirst()), v.getSecond()); + func(v.getKey(), v.getMapped()); } for (auto & v : this->m3) { - func(toStringRef(v.getFirst()), v.getSecond()); + func(v.getKey(), v.getMapped()); } for (auto & v : this->ms) { - func(v.getFirst(), v.getSecond()); + func(v.getKey(), v.getMapped()); } } @@ -167,14 +169,14 @@ public: void ALWAYS_INLINE forEachMapped(Func && func) { if (this->m0.size()) - func(this->m0.zeroValue()->getSecond()); + func(this->m0.zeroValue()->getMapped()); for (auto & v : this->m1) - func(v.getSecond()); + func(v.getMapped()); for (auto & v : this->m2) - func(v.getSecond()); + func(v.getMapped()); for (auto & v : this->m3) - func(v.getSecond()); + func(v.getMapped()); for (auto & v : this->ms) - func(v.getSecond()); + func(v.getMapped()); } }; diff --git a/dbms/src/Common/HashTable/StringHashTable.h b/dbms/src/Common/HashTable/StringHashTable.h index b23edb396ae..d80b26c6a7c 100644 --- a/dbms/src/Common/HashTable/StringHashTable.h +++ b/dbms/src/Common/HashTable/StringHashTable.h @@ -3,9 +3,7 @@ #include #include -struct StringKey0 -{ -}; +#include using StringKey8 = UInt64; using StringKey16 = DB::UInt128; @@ -112,7 +110,7 @@ public: using ConstLookupResult = const Cell *; template - void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t /* hash */) + void ALWAYS_INLINE emplace(KeyHolder &&, LookupResult & it, bool & inserted, size_t = 0) { if (!hasZero()) { @@ -125,11 +123,16 @@ public: } template - LookupResult ALWAYS_INLINE find(Key, size_t /* hash */) + LookupResult ALWAYS_INLINE find(const Key &, size_t = 0) { return hasZero() ? zeroValue() : nullptr; } + template + ConstLookupResult ALWAYS_INLINE find(const Key &, size_t = 0) const + { + return hasZero() ? zeroValue() : nullptr; + } void write(DB::WriteBuffer & wb) const { zeroValue()->write(wb); } void writeText(DB::WriteBuffer & wb) const { zeroValue()->writeText(wb); } @@ -148,6 +151,26 @@ struct StringHashTableGrower : public HashTableGrower void increaseSize() { this->size_degree += 1; } }; +template +struct StringHashTableLookupResult +{ + Mapped * mapped_ptr; + StringHashTableLookupResult() {} + StringHashTableLookupResult(Mapped * mapped_ptr_) : mapped_ptr(mapped_ptr_) {} + StringHashTableLookupResult(std::nullptr_t) {} + const VoidKey getKey() const { return {}; } + auto & getMapped() { return *mapped_ptr; } + auto & operator*() { return *this; } + auto & operator*() const { return *this; } + auto * operator->() { return this; } + auto * operator->() const { return this; } + operator bool() const { return mapped_ptr; } + friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; } + friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; } + friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; } + friend bool operator!=(const std::nullptr_t &, const StringHashTableLookupResult & b) { return b.mapped_ptr; } +}; + template class StringHashTable : private boost::noncopyable { @@ -177,8 +200,12 @@ protected: public: using Key = StringRef; using key_type = Key; + using mapped_type = typename Ts::mapped_type; using value_type = typename Ts::value_type; - using LookupResult = typename Ts::mapped_type *; + using cell_type = typename Ts::cell_type; + + using LookupResult = StringHashTableLookupResult; + using ConstLookupResult = StringHashTableLookupResult; StringHashTable() {} @@ -199,16 +226,15 @@ public: // 2. Use switch case extension to generate fast dispatching table // 3. Funcs are named callables that can be force_inlined // NOTE: It relies on Little Endianness - template - decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) + template + static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) { const StringRef & x = keyHolderGetKey(key_holder); const size_t sz = x.size; if (sz == 0) { - static constexpr StringKey0 key0{}; keyHolderDiscardKey(key_holder); - return func(m0, key0, 0); + return func(self.m0, VoidKey{}, 0); } const char * p = x.data; @@ -239,7 +265,7 @@ public: n[0] >>= s; } keyHolderDiscardKey(key_holder); - return func(m1, k8, hash(k8)); + return func(self.m1, k8, hash(k8)); } case 1: // 9..16 bytes { @@ -248,7 +274,7 @@ public: memcpy(&n[1], lp, 8); n[1] >>= s; keyHolderDiscardKey(key_holder); - return func(m2, k16, hash(k16)); + return func(self.m2, k16, hash(k16)); } case 2: // 17..24 bytes { @@ -257,11 +283,11 @@ public: memcpy(&n[2], lp, 8); n[2] >>= s; keyHolderDiscardKey(key_holder); - return func(m3, k24, hash(k24)); + return func(self.m3, k24, hash(k24)); } default: // >= 25 bytes { - return func(ms, std::forward(key_holder), hash(x)); + return func(self.ms, std::forward(key_holder), hash(x)); } } } @@ -279,14 +305,14 @@ public: { typename Map::LookupResult result; map.emplace(key_holder, result, inserted, hash); - mapped = lookupResultGetMapped(result); + mapped = &result->getMapped(); } }; template void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) { - this->dispatch(key_holder, EmplaceCallable(it, inserted)); + this->dispatch(*this, key_holder, EmplaceCallable(it, inserted)); } struct FindCallable @@ -295,15 +321,25 @@ public: // any key holders here, only with normal keys. The key type is still // different for every subtable, this is why it is a template parameter. template - LookupResult ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash) + auto ALWAYS_INLINE operator()(Submap & map, const SubmapKey & key, size_t hash) { - return lookupResultGetMapped(map.find(key, hash)); + return &map.find(key, hash)->getMapped(); } }; - LookupResult ALWAYS_INLINE find(Key x) + LookupResult ALWAYS_INLINE find(const Key & x) { - return dispatch(x, FindCallable{}); + return dispatch(*this, x, FindCallable{}); + } + + ConstLookupResult ALWAYS_INLINE find(const Key & x) const + { + return dispatch(*this, x, FindCallable{}); + } + + bool ALWAYS_INLINE has(const Key & x, size_t = 0) const + { + return dispatch(*this, x, FindCallable{}) != nullptr; } void write(DB::WriteBuffer & wb) const diff --git a/dbms/src/Common/HashTable/TwoLevelHashMap.h b/dbms/src/Common/HashTable/TwoLevelHashMap.h index f90cb6d2306..7bebf0d8af5 100644 --- a/dbms/src/Common/HashTable/TwoLevelHashMap.h +++ b/dbms/src/Common/HashTable/TwoLevelHashMap.h @@ -16,10 +16,6 @@ template class TwoLevelHashMapTable : public TwoLevelHashTable> { public: - using key_type = Key; - using mapped_type = typename Cell::Mapped; - using value_type = typename Cell::value_type; - using Impl = ImplTable; using LookupResult = typename Impl::LookupResult; @@ -32,16 +28,16 @@ public: this->impls[i].forEachMapped(func); } - mapped_type & ALWAYS_INLINE operator[](Key x) + typename Cell::Mapped & ALWAYS_INLINE operator[](const Key & x) { - typename TwoLevelHashMapTable::LookupResult it; + LookupResult it; bool inserted; this->emplace(x, it, inserted); if (inserted) - new(lookupResultGetMapped(it)) mapped_type(); + new (&it->getMapped()) typename Cell::Mapped(); - return *lookupResultGetMapped(it); + return it->getMapped(); } }; diff --git a/dbms/src/Common/HashTable/TwoLevelHashTable.h b/dbms/src/Common/HashTable/TwoLevelHashTable.h index 988fa139caa..e6e2ce45639 100644 --- a/dbms/src/Common/HashTable/TwoLevelHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelHashTable.h @@ -82,7 +82,9 @@ protected: public: using key_type = typename Impl::key_type; + using mapped_type = typename Impl::mapped_type; using value_type = typename Impl::value_type; + using cell_type = typename Impl::cell_type; using LookupResult = typename Impl::LookupResult; using ConstLookupResult = typename Impl::ConstLookupResult; @@ -217,7 +219,7 @@ public: emplace(Cell::getKey(x), res.first, res.second, hash_value); if (res.second) - insertSetMapped(lookupResultGetMapped(res.first), x); + insertSetMapped(res.first->getMapped(), x); return res; } diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashMap.h b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h index 29bc4b394a7..55d54e51b6a 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashMap.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashMap.h @@ -8,16 +8,12 @@ class TwoLevelStringHashMap : public TwoLevelStringHashTable, StringHashMap>; - using Base::Base; - using typename Base::Impl; - using mapped_type = TMapped; - using value_type = typename Base::value_type; - using LookupResult = typename Base::LookupResult; + using Base::Base; + template void ALWAYS_INLINE forEachMapped(Func && func) { @@ -25,13 +21,13 @@ public: return this->impls[i].forEachMapped(func); } - mapped_type & ALWAYS_INLINE operator[](Key x) + TMapped & ALWAYS_INLINE operator[](const Key & x) { bool inserted; LookupResult it; - emplace(x, it, inserted); + this->emplace(x, it, inserted); if (inserted) - new (lookupResultGetMapped(it)) mapped_type(); - return *lookupResultGetMapped(it); + new (&it->getMapped()) TMapped(); + return it->getMapped(); } }; diff --git a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h index 2aeb266c66b..88241c6c5fe 100644 --- a/dbms/src/Common/HashTable/TwoLevelStringHashTable.h +++ b/dbms/src/Common/HashTable/TwoLevelStringHashTable.h @@ -19,8 +19,7 @@ public: // TODO: currently hashing contains redundant computations when doing distributed or external aggregations size_t hash(const Key & x) const { - return const_cast(*this).dispatch(x, - [&](const auto &, const auto &, size_t hash) { return hash; }); + return const_cast(*this).dispatch(*this, x, [&](const auto &, const auto &, size_t hash) { return hash; }); } size_t operator()(const Key & x) const { return hash(x); } @@ -30,8 +29,12 @@ public: public: using key_type = typename Impl::key_type; + using mapped_type = typename Impl::mapped_type; using value_type = typename Impl::value_type; + using cell_type = typename Impl::cell_type; + using LookupResult = typename Impl::LookupResult; + using ConstLookupResult = typename Impl::ConstLookupResult; Impl impls[NUM_BUCKETS]; @@ -71,16 +74,15 @@ public: // This function is mostly the same as StringHashTable::dispatch, but with // added bucket computation. See the comments there. - template - decltype(auto) ALWAYS_INLINE dispatch(KeyHolder && key_holder, Func && func) + template + static auto ALWAYS_INLINE dispatch(Self & self, KeyHolder && key_holder, Func && func) { const StringRef & x = keyHolderGetKey(key_holder); const size_t sz = x.size; if (sz == 0) { - static constexpr StringKey0 key0{}; keyHolderDiscardKey(key_holder); - return func(impls[0].m0, key0, 0); + return func(self.impls[0].m0, VoidKey{}, 0); } const char * p = x.data; @@ -113,7 +115,7 @@ public: auto res = hash(k8); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); - return func(impls[buck].m1, k8, res); + return func(self.impls[buck].m1, k8, res); } case 1: { @@ -124,7 +126,7 @@ public: auto res = hash(k16); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); - return func(impls[buck].m2, k16, res); + return func(self.impls[buck].m2, k16, res); } case 2: { @@ -135,13 +137,13 @@ public: auto res = hash(k24); auto buck = getBucketFromHash(res); keyHolderDiscardKey(key_holder); - return func(impls[buck].m3, k24, res); + return func(self.impls[buck].m3, k24, res); } default: { auto res = hash(x); auto buck = getBucketFromHash(res); - return func(impls[buck].ms, std::forward(key_holder), res); + return func(self.impls[buck].ms, std::forward(key_holder), res); } } } @@ -149,12 +151,17 @@ public: template void ALWAYS_INLINE emplace(KeyHolder && key_holder, LookupResult & it, bool & inserted) { - dispatch(key_holder, typename Impl::EmplaceCallable{it, inserted}); + dispatch(*this, key_holder, typename Impl::EmplaceCallable{it, inserted}); } - LookupResult ALWAYS_INLINE find(Key x) + LookupResult ALWAYS_INLINE find(const Key x) { - return dispatch(x, typename Impl::FindCallable{}); + return dispatch(*this, x, typename Impl::FindCallable{}); + } + + ConstLookupResult ALWAYS_INLINE find(const Key x) const + { + return dispatch(*this, x, typename Impl::FindCallable{}); } void write(DB::WriteBuffer & wb) const diff --git a/dbms/src/Common/SpaceSaving.h b/dbms/src/Common/SpaceSaving.h index 93ddfee6b19..9ad7f6275d6 100644 --- a/dbms/src/Common/SpaceSaving.h +++ b/dbms/src/Common/SpaceSaving.h @@ -369,7 +369,7 @@ private: if (!it) return nullptr; - return *lookupResultGetMapped(it); + return it->getMapped(); } void rebuildCounterMap() diff --git a/dbms/src/Common/tests/auto_array.cpp b/dbms/src/Common/tests/auto_array.cpp index fd23afc0236..8cc332200f7 100644 --- a/dbms/src/Common/tests/auto_array.cpp +++ b/dbms/src/Common/tests/auto_array.cpp @@ -155,10 +155,10 @@ int main(int argc, char ** argv) map.emplace(rand(), it, inserted); if (inserted) { - new(lookupResultGetMapped(it)) Arr(n); + new (&it->getMapped()) Arr(n); for (size_t j = 0; j < n; ++j) - (*lookupResultGetMapped(it))[j] = field; + (it->getMapped())[j] = field; } } diff --git a/dbms/src/Common/tests/parallel_aggregation.cpp b/dbms/src/Common/tests/parallel_aggregation.cpp index 7ecb054b481..21fc9179d21 100644 --- a/dbms/src/Common/tests/parallel_aggregation.cpp +++ b/dbms/src/Common/tests/parallel_aggregation.cpp @@ -82,14 +82,14 @@ void aggregate12(Map & map, Source::const_iterator begin, Source::const_iterator { if (prev_it != end && *it == *prev_it) { - ++*lookupResultGetMapped(found); + ++found->getMapped(); continue; } prev_it = it; bool inserted; map.emplace(*it, found, inserted); - ++*lookupResultGetMapped(found); + ++found->getMapped(); } } @@ -107,14 +107,14 @@ void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source::const_ { if (*it == *prev_it) { - ++*lookupResultGetMapped(found); + ++found->getMapped(); continue; } prev_it = it; bool inserted; map.emplace(*it, found, inserted); - ++*lookupResultGetMapped(found); + ++found->getMapped(); } } @@ -126,7 +126,7 @@ void merge2(MapTwoLevel * maps, size_t num_threads, size_t bucket) { for (size_t i = 1; i < num_threads; ++i) for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it) - maps[0].impls[bucket][it->getFirst()] += it->getSecond(); + maps[0].impls[bucket][it->getKey()] += it->getMapped(); } void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end) @@ -138,7 +138,7 @@ void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_ auto found = local_map.find(*it); if (found) - ++*lookupResultGetMapped(found); + ++found->getMapped(); else if (local_map.size() < threshold) ++local_map[*it]; /// TODO You could do one lookup, not two. else @@ -163,13 +163,13 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const Map::LookupResult found; bool inserted; local_map.emplace(*it, found, inserted); - ++*lookupResultGetMapped(found); + ++found->getMapped(); if (inserted && local_map.size() == threshold) { std::lock_guard lock(mutex); for (auto & value_type : local_map) - global_map[value_type.getFirst()] += value_type.getSecond(); + global_map[value_type.getKey()] += value_type.getMapped(); local_map.clear(); } @@ -198,7 +198,7 @@ void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexes, Sour auto found = local_map.find(*it); if (found) - ++*lookupResultGetMapped(found); + ++found->getMapped(); else { size_t hash_value = global_map.hash(*it); @@ -311,7 +311,7 @@ int main(int argc, char ** argv) for (size_t i = 1; i < num_threads; ++i) for (auto it = maps[i].begin(); it != maps[i].end(); ++it) - maps[0][it->getFirst()] += it->getSecond(); + maps[0][it->getKey()] += it->getMapped(); watch.stop(); double time_merged = watch.elapsedSeconds(); @@ -365,7 +365,7 @@ int main(int argc, char ** argv) for (size_t i = 1; i < num_threads; ++i) for (auto it = maps[i].begin(); it != maps[i].end(); ++it) - maps[0][it->getFirst()] += it->getSecond(); + maps[0][it->getKey()] += it->getMapped(); watch.stop(); @@ -435,7 +435,7 @@ int main(int argc, char ** argv) continue; finish = false; - maps[0][iterators[i]->getFirst()] += iterators[i]->getSecond(); + maps[0][iterators[i]->getKey()] += iterators[i]->getMapped(); ++iterators[i]; } @@ -623,7 +623,7 @@ int main(int argc, char ** argv) for (size_t i = 0; i < num_threads; ++i) for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it) - global_map[it->getFirst()] += it->getSecond(); + global_map[it->getKey()] += it->getMapped(); pool.wait(); @@ -689,7 +689,7 @@ int main(int argc, char ** argv) for (size_t i = 0; i < num_threads; ++i) for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it) - global_map[it->getFirst()] += it->getSecond(); + global_map[it->getKey()] += it->getMapped(); pool.wait(); @@ -760,7 +760,7 @@ int main(int argc, char ** argv) for (size_t i = 0; i < num_threads; ++i) for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it) - global_map[it->getFirst()] += it->getSecond(); + global_map[it->getKey()] += it->getMapped(); pool.wait(); diff --git a/dbms/src/Common/tests/parallel_aggregation2.cpp b/dbms/src/Common/tests/parallel_aggregation2.cpp index 56eb34bbf0c..e46c9c7c4fc 100644 --- a/dbms/src/Common/tests/parallel_aggregation2.cpp +++ b/dbms/src/Common/tests/parallel_aggregation2.cpp @@ -51,9 +51,9 @@ struct AggregateIndependent map.emplace(*it, place, inserted); if (inserted) - creator(*lookupResultGetMapped(place)); + creator(place->getMapped()); else - updater(*lookupResultGetMapped(place)); + updater(place->getMapped()); } }); } @@ -93,7 +93,7 @@ struct AggregateIndependentWithSequentialKeysOptimization { if (it != begin && *it == prev_key) { - updater(*lookupResultGetMapped(place)); + updater(place->getMapped()); continue; } prev_key = *it; @@ -102,9 +102,9 @@ struct AggregateIndependentWithSequentialKeysOptimization map.emplace(*it, place, inserted); if (inserted) - creator(*lookupResultGetMapped(place)); + creator(place->getMapped()); else - updater(*lookupResultGetMapped(place)); + updater(place->getMapped()); } }); } @@ -131,7 +131,7 @@ struct MergeSequential auto begin = source_maps[i]->begin(); auto end = source_maps[i]->end(); for (auto it = begin; it != end; ++it) - merger((*source_maps[0])[it->getFirst()], it->getSecond()); + merger((*source_maps[0])[it->getKey()], it->getMapped()); } result_map = source_maps[0]; @@ -161,7 +161,7 @@ struct MergeSequentialTransposed /// In practice not better than usual. continue; finish = false; - merger((*result_map)[iterators[i]->getFirst()], iterators[i]->getSecond()); + merger((*result_map)[iterators[i]->getKey()], iterators[i]->getMapped()); ++iterators[i]; } diff --git a/dbms/src/Common/tests/small_table.cpp b/dbms/src/Common/tests/small_table.cpp index 32b4e8c48fe..9266e928d09 100644 --- a/dbms/src/Common/tests/small_table.cpp +++ b/dbms/src/Common/tests/small_table.cpp @@ -42,7 +42,7 @@ int main(int, char **) cont[1] = "Goodbye."; for (auto x : cont) - std::cerr << x.getFirst() << " -> " << x.getSecond() << std::endl; + std::cerr << x.getKey() << " -> " << x.getMapped() << std::endl; DB::WriteBufferFromOwnString wb; cont.writeText(wb); diff --git a/dbms/src/Core/tests/string_pool.cpp b/dbms/src/Core/tests/string_pool.cpp index 2db1233e8fe..a389fb01a5e 100644 --- a/dbms/src/Core/tests/string_pool.cpp +++ b/dbms/src/Core/tests/string_pool.cpp @@ -211,7 +211,7 @@ int main(int argc, char ** argv) { RefsHashMap::LookupResult inserted_it; bool inserted; - set.emplace(StringRef(*lookupResultGetMapped(it)), inserted_it, inserted); + set.emplace(StringRef(*it), inserted_it, inserted); } std::cerr << "Inserted refs into HashMap in " << watch.elapsedSeconds() << " sec, " @@ -222,7 +222,7 @@ int main(int argc, char ** argv) size_t i = 0; for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { - devnull.write(it->getFirst().data, it->getFirst().size); + devnull.write(it->getKey().data, it->getKey().size); devnull << std::endl; } @@ -249,7 +249,7 @@ int main(int argc, char ** argv) size_t i = 0; for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i) { - devnull.write(it->getFirst().data, it->getFirst().size); + devnull.write(it->getKey().data, it->getKey().size); devnull << std::endl; } } diff --git a/dbms/src/DataTypes/DataTypeEnum.cpp b/dbms/src/DataTypes/DataTypeEnum.cpp index 416636f30db..5365b99fed3 100644 --- a/dbms/src/DataTypes/DataTypeEnum.cpp +++ b/dbms/src/DataTypes/DataTypeEnum.cpp @@ -75,7 +75,7 @@ void DataTypeEnum::fillMaps() if (!inserted_value.second) throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second) - + " and " + toString(*lookupResultGetMapped(inserted_value.first)), + + " and " + toString(inserted_value.first->getMapped()), ErrorCodes::SYNTAX_ERROR}; const auto inserted_name = value_to_name_map.insert( diff --git a/dbms/src/DataTypes/DataTypeEnum.h b/dbms/src/DataTypes/DataTypeEnum.h index ac96c08dc75..2e5ba74b3e6 100644 --- a/dbms/src/DataTypes/DataTypeEnum.h +++ b/dbms/src/DataTypes/DataTypeEnum.h @@ -81,7 +81,7 @@ public: if (!it) throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::LOGICAL_ERROR}; - return *lookupResultGetMapped(it); + return it->getMapped(); } Field castToName(const Field & value_or_name) const override; diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp index b27adc20636..8ed917e8f89 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp @@ -216,7 +216,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes std::vector required_rows(outdated_keys.size()); std::transform( - std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); }); + std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); }); /// request new values update( diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index 6837bd9eab4..d8146548c2b 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -311,7 +311,7 @@ private: std::vector required_rows(outdated_keys.size()); std::transform( - std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); }); + std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getMapped().front(); }); /// request new values update( @@ -437,7 +437,7 @@ private: std::vector required_rows(outdated_keys.size()); std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { - return pair.getSecond().front(); + return pair.getMapped().front(); }); update( @@ -469,7 +469,7 @@ private: { const StringRef key = keys_array[row]; const auto it = map.find(key); - const auto string_ref = it ? *lookupResultGetMapped(it) : get_default(row); + const auto string_ref = it ? it->getMapped() : get_default(row); out->insertData(string_ref.data, string_ref.size); } } @@ -576,7 +576,7 @@ private: /// Check which ids have not been found and require setting null_value for (const auto & key_found_pair : remaining_keys) { - if (key_found_pair.getSecond()) + if (key_found_pair.getMapped()) { ++found_num; continue; @@ -584,7 +584,7 @@ private: ++not_found_num; - auto key = key_found_pair.getFirst(); + auto key = key_found_pair.getKey(); const auto hash = StringRefHash{}(key); const auto find_result = findCellIdx(key, now, hash); const auto & cell_idx = find_result.cell_idx; diff --git a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp index 41a5caaa768..1dafde39a24 100644 --- a/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyHashedDictionary.cpp @@ -561,7 +561,7 @@ void ComplexKeyHashedDictionary::getItemsImpl( const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool); const auto it = attr.find(key); - set_value(i, it ? static_cast(*lookupResultGetMapped(it)) : get_default(i)); + set_value(i, it ? static_cast(it->getMapped()) : get_default(i)); /// free memory allocated for the key temporary_keys_pool.rollback(key.size); @@ -729,7 +729,7 @@ std::vector ComplexKeyHashedDictionary::getKeys(const Attribute & att std::vector keys; keys.reserve(attr.size()); for (const auto & key : attr) - keys.push_back(key.getFirst()); + keys.push_back(key.getKey()); return keys; } diff --git a/dbms/src/Dictionaries/HashedDictionary.cpp b/dbms/src/Dictionaries/HashedDictionary.cpp index 1c6fd602ba9..78c871bebc4 100644 --- a/dbms/src/Dictionaries/HashedDictionary.cpp +++ b/dbms/src/Dictionaries/HashedDictionary.cpp @@ -13,8 +13,8 @@ template auto first(const T & value) -> decltype(value.first) { ret template auto second(const T & value) -> decltype(value.second) { return value.second; } /// HashMap -template auto first(const T & value) -> decltype(value.getFirst()) { return value.getFirst(); } -template auto second(const T & value) -> decltype(value.getSecond()) { return value.getSecond(); } +template auto first(const T & value) -> decltype(value.getKey()) { return value.getKey(); } +template auto second(const T & value) -> decltype(value.getMapped()) { return value.getMapped(); } } diff --git a/dbms/src/Dictionaries/RangeHashedDictionary.cpp b/dbms/src/Dictionaries/RangeHashedDictionary.cpp index ae67027c210..b1412d98f75 100644 --- a/dbms/src/Dictionaries/RangeHashedDictionary.cpp +++ b/dbms/src/Dictionaries/RangeHashedDictionary.cpp @@ -127,7 +127,7 @@ void RangeHashedDictionary::getString( if (it) { const auto date = dates[i]; - const auto & ranges_and_values = *lookupResultGetMapped(it); + const auto & ranges_and_values = it->getMapped(); const auto val_it = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value & v) { @@ -398,7 +398,7 @@ void RangeHashedDictionary::getItemsImpl( if (it) { const auto date = dates[i]; - const auto & ranges_and_values = *lookupResultGetMapped(it); + const auto & ranges_and_values = it->getMapped(); const auto val_it = std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value & v) { @@ -425,7 +425,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K if (it) { - auto & values = *lookupResultGetMapped(it); + auto & values = it->getMapped(); const auto insert_it = std::lower_bound(std::begin(values), std::end(values), range, [](const Value & lhs, const Range & rhs_range) @@ -498,7 +498,7 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i if (it) { - auto & values = *lookupResultGetMapped(it); + auto & values = it->getMapped(); const auto insert_it = std::lower_bound( std::begin(values), std::end(values), range, [](const Value & lhs, const Range & rhs_range) @@ -610,9 +610,9 @@ void RangeHashedDictionary::getIdsAndDates( for (const auto & key : attr) { - for (const auto & value : key.getSecond()) + for (const auto & value : key.getMapped()) { - ids.push_back(key.getFirst()); + ids.push_back(key.getKey()); start_dates.push_back(value.range.left); end_dates.push_back(value.range.right); diff --git a/dbms/src/Functions/addressToLine.cpp b/dbms/src/Functions/addressToLine.cpp index e2ab7576eef..c3f3153b80e 100644 --- a/dbms/src/Functions/addressToLine.cpp +++ b/dbms/src/Functions/addressToLine.cpp @@ -140,8 +140,8 @@ private: std::lock_guard lock(mutex); map.emplace(addr, it, inserted); if (inserted) - *lookupResultGetMapped(it) = impl(addr); - return *lookupResultGetMapped(it); + it->getMapped() = impl(addr); + return it->getMapped(); } }; diff --git a/dbms/src/Functions/array/arrayIntersect.cpp b/dbms/src/Functions/array/arrayIntersect.cpp index 6aec4f94dc8..8881abb1552 100644 --- a/dbms/src/Functions/array/arrayIntersect.cpp +++ b/dbms/src/Functions/array/arrayIntersect.cpp @@ -467,15 +467,15 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable for (const auto & pair : map) { - if (pair.getSecond() == args) + if (pair.getMapped() == args) { ++result_offset; if constexpr (is_numeric_column) - result_data.insertValue(pair.getFirst()); + result_data.insertValue(pair.getKey()); else if constexpr (std::is_same::value || std::is_same::value) - result_data.insertData(pair.getFirst().data, pair.getFirst().size); + result_data.insertData(pair.getKey().data, pair.getKey().size); else - result_data.deserializeAndInsertFromArena(pair.getFirst().data); + result_data.deserializeAndInsertFromArena(pair.getKey().data); if (all_nullable) null_map.push_back(0); diff --git a/dbms/src/Functions/transform.cpp b/dbms/src/Functions/transform.cpp index df3daa62f95..583430c2ef7 100644 --- a/dbms/src/Functions/transform.cpp +++ b/dbms/src/Functions/transform.cpp @@ -508,7 +508,7 @@ private: { auto it = table.find(src[i]); if (it) - memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); /// little endian. + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian. else dst[i] = dst_default; } @@ -524,7 +524,7 @@ private: { auto it = table.find(src[i]); if (it) - memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); /// little endian. + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian. else dst[i] = dst_default[i]; } @@ -540,7 +540,7 @@ private: { auto it = table.find(src[i]); if (it) - memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); else dst[i] = src[i]; } @@ -557,7 +557,7 @@ private: for (size_t i = 0; i < size; ++i) { auto it = table.find(src[i]); - StringRef ref = it ? *lookupResultGetMapped(it) : dst_default; + StringRef ref = it ? it->getMapped() : dst_default; dst_data.resize(current_dst_offset + ref.size); memcpy(&dst_data[current_dst_offset], ref.data, ref.size); current_dst_offset += ref.size; @@ -581,7 +581,7 @@ private: StringRef ref; if (it) - ref = *lookupResultGetMapped(it); + ref = it->getMapped(); else { ref.data = reinterpret_cast(&dst_default_data[current_dst_default_offset]); @@ -611,7 +611,7 @@ private: current_src_offset = src_offsets[i]; auto it = table.find(ref); if (it) - memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); else dst[i] = dst_default; } @@ -632,7 +632,7 @@ private: current_src_offset = src_offsets[i]; auto it = table.find(ref); if (it) - memcpy(&dst[i], lookupResultGetMapped(it), sizeof(dst[i])); + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); else dst[i] = dst_default[i]; } @@ -655,7 +655,7 @@ private: auto it = table.find(src_ref); - StringRef dst_ref = it ? *lookupResultGetMapped(it) : (with_default ? dst_default : src_ref); + StringRef dst_ref = it ? it->getMapped() : (with_default ? dst_default : src_ref); dst_data.resize(current_dst_offset + dst_ref.size); memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size); current_dst_offset += dst_ref.size; @@ -697,7 +697,7 @@ private: StringRef dst_ref; if (it) - dst_ref = *lookupResultGetMapped(it); + dst_ref = it->getMapped(); else { dst_ref.data = reinterpret_cast(&dst_default_data[current_dst_default_offset]); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 0f0faaecb44..359b9f14c2a 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -180,8 +180,6 @@ struct AggregationMethodOneNumber using Data = TData; using Key = typename Data::key_type; using Mapped = typename Data::mapped_type; - using iterator = typename Data::iterator; - using const_iterator = typename Data::const_iterator; Data data; @@ -356,8 +354,6 @@ struct AggregationMethodKeysFixed using Data = TData; using Key = typename Data::key_type; using Mapped = typename Data::mapped_type; - using iterator = typename Data::iterator; - using const_iterator = typename Data::const_iterator; static constexpr bool has_nullable_keys = has_nullable_keys_; static constexpr bool has_low_cardinality = has_low_cardinality_; diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 3267e6a779b..afc056933ce 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -1237,7 +1237,7 @@ private: for (; it != end; ++it) { - const Mapped & mapped = it->getSecond(); + const Mapped & mapped = it->getMapped(); if (mapped.getUsed()) continue; diff --git a/dbms/src/Interpreters/tests/hash_map.cpp b/dbms/src/Interpreters/tests/hash_map.cpp index 910bf2c0649..7c52953fa9f 100644 --- a/dbms/src/Interpreters/tests/hash_map.cpp +++ b/dbms/src/Interpreters/tests/hash_map.cpp @@ -162,8 +162,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(lookupResultGetMapped(it)) Value; - std::swap(*lookupResultGetMapped(it), value); + new (&it->getMapped()) Value; + std::swap(it->getMapped(), value); INIT } } @@ -193,8 +193,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(lookupResultGetMapped(it)) Value; - std::swap(*lookupResultGetMapped(it), value); + new (&it->getMapped()) Value; + std::swap(it->getMapped(), value); INIT } } @@ -225,8 +225,8 @@ int main(int argc, char ** argv) map.emplace(data[i], it, inserted); if (inserted) { - new(lookupResultGetMapped(it)) Value; - std::swap(*lookupResultGetMapped(it), value); + new (&it->getMapped()) Value; + std::swap(it->getMapped(), value); INIT } } diff --git a/dbms/src/Interpreters/tests/hash_map3.cpp b/dbms/src/Interpreters/tests/hash_map3.cpp index 4b076152b07..2207edc6cc1 100644 --- a/dbms/src/Interpreters/tests/hash_map3.cpp +++ b/dbms/src/Interpreters/tests/hash_map3.cpp @@ -85,7 +85,7 @@ int main(int, char **) std::cerr << "Collisions: " << map.getCollisions() << std::endl; for (auto x : map) - std::cerr << x.getFirst().toString() << " -> " << x.getSecond() << std::endl; + std::cerr << x.getKey().toString() << " -> " << x.getMapped() << std::endl; return 0; } diff --git a/dbms/src/Interpreters/tests/hash_map_lookup.cpp b/dbms/src/Interpreters/tests/hash_map_lookup.cpp index b34c23e6c41..9ec191840e2 100644 --- a/dbms/src/Interpreters/tests/hash_map_lookup.cpp +++ b/dbms/src/Interpreters/tests/hash_map_lookup.cpp @@ -55,15 +55,15 @@ void NO_INLINE bench(const std::vector & data, const char * name) map.emplace(data[i], it, inserted); if (inserted) - *lookupResultGetMapped(it) = 1; + it->getMapped() = 1; else - ++*lookupResultGetMapped(it); + ++it->getMapped(); } for (size_t i = 0, size = data.size(); i < size; ++i) { auto it = map.find(data[i]); - ++*lookupResultGetMapped(it); + ++it->getMapped(); } watch.stop(); std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size() @@ -81,10 +81,10 @@ void insert(Map & map, StringRef & k) typename Map::LookupResult it; map.emplace(k, it, inserted, nullptr); if (inserted) - *lookupResultGetMapped(it) = 1; + it->getMapped() = 1; else - ++*lookupResultGetMapped(it); - std::cout << *lookupResultGetMapped(map.find(k))<< std::endl; + ++it->getMapped(); + std::cout << map.find(k)->getMapped() << std::endl; } int main(int argc, char ** argv) diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp index 61980a614ab..aad3b0a6442 100644 --- a/dbms/src/Interpreters/tests/hash_map_string.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string.cpp @@ -337,8 +337,8 @@ int main(int argc, char ** argv) { map.emplace(data[i], it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); @@ -366,8 +366,8 @@ int main(int argc, char ** argv) { map.emplace(data[i], it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); @@ -396,8 +396,8 @@ int main(int argc, char ** argv) { map.emplace(data[i], it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); @@ -426,8 +426,8 @@ int main(int argc, char ** argv) { map.emplace(data[i], it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); diff --git a/dbms/src/Interpreters/tests/hash_map_string_2.cpp b/dbms/src/Interpreters/tests/hash_map_string_2.cpp index 66a087d6824..85176906436 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_2.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_2.cpp @@ -595,8 +595,8 @@ void NO_INLINE bench(const std::vector & data, const char * name) { map.emplace(static_cast(data[i]), it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); diff --git a/dbms/src/Interpreters/tests/hash_map_string_3.cpp b/dbms/src/Interpreters/tests/hash_map_string_3.cpp index 7bd27bc6785..3c58771d87b 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_3.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_3.cpp @@ -442,8 +442,8 @@ void NO_INLINE bench(const std::vector & data, const char * name) { map.emplace(static_cast(data[i]), it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); diff --git a/dbms/src/Interpreters/tests/hash_map_string_small.cpp b/dbms/src/Interpreters/tests/hash_map_string_small.cpp index 529cef13c11..cbfc6bcdc67 100644 --- a/dbms/src/Interpreters/tests/hash_map_string_small.cpp +++ b/dbms/src/Interpreters/tests/hash_map_string_small.cpp @@ -144,8 +144,8 @@ int main(int argc, char ** argv) { map.emplace(data[i], it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); @@ -173,8 +173,8 @@ int main(int argc, char ** argv) { map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); diff --git a/dbms/src/Interpreters/tests/string_hash_map.cpp b/dbms/src/Interpreters/tests/string_hash_map.cpp index b16e1a91aa5..2191ad84705 100644 --- a/dbms/src/Interpreters/tests/string_hash_map.cpp +++ b/dbms/src/Interpreters/tests/string_hash_map.cpp @@ -151,8 +151,8 @@ void NO_INLINE bench(const std::vector & data, DB::Arena &, const cha { map.emplace(DB::ArenaKeyHolder{data[i], pool}, it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); diff --git a/dbms/src/Interpreters/tests/two_level_hash_map.cpp b/dbms/src/Interpreters/tests/two_level_hash_map.cpp index ed9df82d0b1..e1370cd3932 100644 --- a/dbms/src/Interpreters/tests/two_level_hash_map.cpp +++ b/dbms/src/Interpreters/tests/two_level_hash_map.cpp @@ -67,8 +67,8 @@ int main(int argc, char ** argv) { map.emplace(data[i], it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); @@ -82,7 +82,7 @@ int main(int argc, char ** argv) size_t elems = 0; for (const auto & kv : map) { - sum_counts += kv.getSecond(); + sum_counts += kv.getMapped(); ++elems; } @@ -103,8 +103,8 @@ int main(int argc, char ** argv) { map.emplace(i, it, inserted); if (inserted) - *lookupResultGetMapped(it) = 0; - ++*lookupResultGetMapped(it); + it->getMapped() = 0; + ++it->getMapped(); } watch.stop(); @@ -118,11 +118,11 @@ int main(int argc, char ** argv) size_t elems = 0; for (const auto & kv : map) { - sum_counts += kv.getSecond(); + sum_counts += kv.getMapped(); ++elems; - if (kv.getFirst() > n) - std::cerr << kv.getFirst() << std::endl; + if (kv.getKey() > n) + std::cerr << kv.getKey() << std::endl; } std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl; diff --git a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index e49f9315887..a4a6911f8eb 100644 --- a/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -65,9 +65,9 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz if (prev_positions.size() > key_index && prev_positions[key_index] - && name == *lookupResultGetKey(prev_positions[key_index])) + && name == prev_positions[key_index]->getKey()) { - return *lookupResultGetMapped(prev_positions[key_index]); + return prev_positions[key_index]->getMapped(); } else { @@ -78,7 +78,7 @@ inline size_t JSONEachRowRowInputFormat::columnIndex(const StringRef & name, siz if (key_index < prev_positions.size()) prev_positions[key_index] = it; - return *lookupResultGetMapped(it); + return it->getMapped(); } else return UNKNOWN_FIELD; diff --git a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 60df642836c..0b1b7e3e44b 100644 --- a/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -131,7 +131,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex } else { - index = *lookupResultGetMapped(it); + index = it->getMapped(); if (seen_columns[index]) throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); diff --git a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp index e29ae01b3b3..ecad8b05600 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -57,7 +57,7 @@ void buildScatterSelector( throw Exception("Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).", ErrorCodes::TOO_MANY_PARTS); partition_num_to_first_row.push_back(i); - *lookupResultGetMapped(it) = partitions_count; + it->getMapped() = partitions_count; ++partitions_count; @@ -70,7 +70,7 @@ void buildScatterSelector( } if (partitions_count > 1) - selector[i] = *lookupResultGetMapped(it); + selector[i] = it->getMapped(); } } diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index d3f41f3b0b5..33127f97874 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -333,9 +333,9 @@ private: { for (size_t j = 0; j < columns.size(); ++j) if (j == key_pos) - columns[j]->insertData(rawData(it->getFirst()), rawSize(it->getFirst())); + columns[j]->insertData(rawData(it->getKey()), rawSize(it->getKey())); else - columns[j]->insertFrom(*it->getSecond().block->getByPosition(column_indices[j]).column.get(), it->getSecond().row_num); + columns[j]->insertFrom(*it->getMapped().block->getByPosition(column_indices[j]).column.get(), it->getMapped().row_num); ++rows_added; } else if constexpr (STRICTNESS == ASTTableJoin::Strictness::Asof) @@ -343,11 +343,11 @@ private: throw Exception("ASOF join storage is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); } else - for (auto ref_it = it->getSecond().begin(); ref_it.ok(); ++ref_it) + for (auto ref_it = it->getMapped().begin(); ref_it.ok(); ++ref_it) { for (size_t j = 0; j < columns.size(); ++j) if (j == key_pos) - columns[j]->insertData(rawData(it->getFirst()), rawSize(it->getFirst())); + columns[j]->insertData(rawData(it->getKey()), rawSize(it->getKey())); else columns[j]->insertFrom(*ref_it->block->getByPosition(column_indices[j]).column.get(), ref_it->row_num); ++rows_added; diff --git a/utils/test-data-generator/MarkovModel.h b/utils/test-data-generator/MarkovModel.h index 7ef69b2a1f0..338aee2e61f 100644 --- a/utils/test-data-generator/MarkovModel.h +++ b/utils/test-data-generator/MarkovModel.h @@ -105,7 +105,7 @@ public: if (table.end() == it) return pos - data; - *pos = it->getSecond().sample(random()); + *pos = it->getMapped().sample(random()); /// Zero byte marks end of string. if (0 == *pos) @@ -125,12 +125,12 @@ public: for (auto & elem : table) { UInt32 new_total = 0; - for (auto & frequency : elem.getSecond().data) + for (auto & frequency : elem.getMapped().data) { frequency.count = transform(frequency.count); new_total += frequency.count; } - elem.getSecond().total = new_total; + elem.getMapped().total = new_total; } } @@ -142,10 +142,10 @@ public: for (const auto & elem : table) { - writeBinary(elem.getFirst(), out); - writeBinary(UInt8(elem.getSecond().data.size()), out); + writeBinary(elem.getKey(), out); + writeBinary(UInt8(elem.getMapped().data.size()), out); - for (const auto & frequency : elem.getSecond().data) + for (const auto & frequency : elem.getMapped().data) { writeBinary(frequency.byte, out); writeVarUInt(frequency.count, out);