#pragma once #include #include #include #include #include namespace DB { namespace { template struct ReverseIndexHashTableState; template struct ReverseIndexHashTableState { constexpr static bool with_saved_hash = false; constexpr static bool has_base_index = false; ColumnType * index_column; }; template struct ReverseIndexHashTableState { constexpr static bool with_saved_hash = false; constexpr static bool has_base_index = true; ColumnType * index_column; size_t base_index; }; template struct ReverseIndexHashTableState { constexpr static bool with_saved_hash = true; constexpr static bool has_base_index = false; ColumnType * index_column; typename ColumnVector::Container * saved_hash_column; }; template struct ReverseIndexHashTableState { constexpr static bool with_saved_hash = true; constexpr static bool has_base_index = true; ColumnType * index_column; typename ColumnVector::Container * saved_hash_column; size_t base_index; }; template struct ReverseIndexHash : public Hash { template size_t operator()(T) const { throw Exception("operator()(key) is not implemented for ReverseIndexHash.", ErrorCodes::LOGICAL_ERROR); } template size_t operator()(const State & state, T key) const { auto index = key; if constexpr (State::has_base_index) index -= state.base_index; return Hash::operator()(state.index_column->getElement(index)); } }; using ReverseIndexStringHash = ReverseIndexHash; template using ReverseIndexNumberHash = ReverseIndexHash>; template struct ReverseIndexHashTableCell : public HashTableCell> { using Base = HashTableCell>; using State = typename Base::State; using Base::Base; using Base::key; using Base::keyEquals; using Base::isZero; template static bool isZero(const T &, const State & /*state*/) { static_assert(!std::is_same_v::type, typename std::decay::type>); return false; } /// Special case when we want to compare with something not in index_column. /// When we compare something inside column default keyEquals checks only that row numbers are equal. bool keyEquals(const StringRef & object, size_t hash_ [[maybe_unused]], const State & state) const { auto index = key; if constexpr (has_base_index) index -= state.base_index; if constexpr (string_hash) return hash_ == (*state.saved_hash_column)[index] && object == state.index_column->getDataAt(index); else return object == state.index_column->getDataAt(index); } size_t getHash(const Hash & hash) const { auto index = key; /// Hack. HashTable is Hash itself. const auto & state = static_cast(static_cast(hash)); if constexpr (has_base_index) index -= state.base_index; if constexpr (string_hash) return (*state.saved_hash_column)[index]; else return hash(state, key); } }; template class HashTableWithPublicState : public HashTable, HashTableAllocator> { using State = typename Cell::State; using Base = HashTable, HashTableAllocator>; public: using Base::Base; State & getState() { return *this; } }; template class ReverseIndexStringHashTable : public HashTableWithPublicState< IndexType, ReverseIndexHashTableCell< IndexType, ReverseIndexStringHash, ReverseIndexStringHashTable, ColumnType, true, has_base_index>, ReverseIndexStringHash> { using Base = HashTableWithPublicState< IndexType, ReverseIndexHashTableCell< IndexType, ReverseIndexStringHash, ReverseIndexStringHashTable, ColumnType, true, has_base_index>, ReverseIndexStringHash>; public: using Base::Base; friend struct ReverseIndexHashTableCell< IndexType, ReverseIndexStringHash, ReverseIndexStringHashTable, ColumnType, true, has_base_index>; }; template class ReverseIndexNumberHashTable : public HashTableWithPublicState< IndexType, ReverseIndexHashTableCell< IndexType, ReverseIndexNumberHash, ReverseIndexNumberHashTable, ColumnType, false, has_base_index>, ReverseIndexNumberHash> { using Base = HashTableWithPublicState< IndexType, ReverseIndexHashTableCell< IndexType, ReverseIndexNumberHash, ReverseIndexNumberHashTable, ColumnType, false, has_base_index>, ReverseIndexNumberHash>; public: using Base::Base; friend struct ReverseIndexHashTableCell< IndexType, ReverseIndexNumberHash, ReverseIndexNumberHashTable, ColumnType, false, has_base_index>; }; template struct SelectReverseIndexHashTable; template struct SelectReverseIndexHashTable { using Type = ReverseIndexNumberHashTable; }; template struct SelectReverseIndexHashTable { using Type = ReverseIndexStringHashTable; }; template constexpr bool isNumericColumn(const T *) { return false; } template constexpr bool isNumericColumn(const ColumnVector *) { return true; } static_assert(isNumericColumn(static_cast *>(nullptr))); static_assert(!isNumericColumn(static_cast(nullptr))); template using ReverseIndexHashTable = typename SelectReverseIndexHashTable(nullptr))>::Type; } template class ReverseIndex { public: explicit ReverseIndex(UInt64 num_prefix_rows_to_skip, UInt64 base_index) : num_prefix_rows_to_skip(num_prefix_rows_to_skip), base_index(base_index), saved_hash_ptr(nullptr) {} void setColumn(ColumnType * column_); static constexpr bool is_numeric_column = isNumericColumn(static_cast(nullptr)); static constexpr bool use_saved_hash = !is_numeric_column; UInt64 insert(UInt64 from_position); /// Insert into index column[from_position]; UInt64 insertFromLastRow(); UInt64 getInsertionPoint(const StringRef & data); UInt64 lastInsertionPoint() const { return size() + base_index; } ColumnType * getColumn() const { return column; } size_t size() const; const UInt64 * tryGetSavedHash() const { if (!use_saved_hash) return nullptr; UInt64 * ptr = saved_hash_ptr.load(); if (!ptr) { auto hash = calcHashes(); ptr = &hash->getData()[0]; UInt64 * expected = nullptr; if(saved_hash_ptr.compare_exchange_strong(expected, ptr)) saved_hash = std::move(hash); else ptr = expected; } return ptr; } size_t allocatedBytes() const { return index ? index->getBufferSizeInBytes() : 0; } private: ColumnType * column = nullptr; UInt64 num_prefix_rows_to_skip; /// The number prefix tows in column which won't be sored at index. UInt64 base_index; /// This values will be added to row number which is inserted into index. using IndexMapType = ReverseIndexHashTable; /// Lazy initialized. std::unique_ptr index; mutable ColumnUInt64::MutablePtr saved_hash; mutable std::atomic saved_hash_ptr; void buildIndex(); UInt64 getHash(const StringRef & ref) const { if constexpr (is_numeric_column) { using ValueType = typename ColumnType::value_type; ValueType value = *reinterpret_cast(ref.data); return DefaultHash()(value); } else return StringRefHash()(ref); } ColumnUInt64::MutablePtr calcHashes() const; }; template void ReverseIndex:: setColumn(ColumnType * column_) { if (column != column_) { index = nullptr; saved_hash = nullptr; } column = column_; } template size_t ReverseIndex::size() const { if (!column) throw Exception("ReverseIndex has not size because index column wasn't set.", ErrorCodes::LOGICAL_ERROR); return column->size(); } template void ReverseIndex::buildIndex() { if (index) return; if (!column) throw Exception("ReverseIndex can't build index because index column wasn't set.", ErrorCodes::LOGICAL_ERROR); auto size = column->size(); index = std::make_unique(size); if constexpr (use_saved_hash) saved_hash = calcHashes(); auto & state = index->getState(); state.index_column = column; state.base_index = base_index; if constexpr (use_saved_hash) state.saved_hash_column = &saved_hash->getData(); using IteratorType = typename IndexMapType::iterator; IteratorType iterator; bool inserted; for (auto row : ext::range(num_prefix_rows_to_skip, size)) { UInt64 hash; if constexpr (use_saved_hash) hash = saved_hash->getElement(row); else hash = getHash(column->getDataAt(row)); index->emplace(row + base_index, iterator, inserted, hash); if (!inserted) throw Exception("Duplicating keys found in ReverseIndex.", ErrorCodes::LOGICAL_ERROR); } } template ColumnUInt64::MutablePtr ReverseIndex::calcHashes() const { if (!column) throw Exception("ReverseIndex can't build index because index column wasn't set.", ErrorCodes::LOGICAL_ERROR); auto size = column->size(); auto hash = ColumnUInt64::create(size); for (auto row : ext::range(0, size)) hash->getElement(row) = getHash(column->getDataAt(row)); return std::move(hash); }; template UInt64 ReverseIndex::insert(UInt64 from_position) { if (!index) buildIndex(); using IteratorType = typename IndexMapType::iterator; IteratorType iterator; bool inserted; auto hash = getHash(column->getDataAt(from_position)); if constexpr (use_saved_hash) { auto & data = saved_hash->getData(); if (data.size() <= from_position) data.resize(from_position + 1); data[from_position] = hash; } index->emplace(from_position + base_index, iterator, inserted, hash); return *iterator; } template UInt64 ReverseIndex::insertFromLastRow() { if (!column) throw Exception("ReverseIndex can't insert row from column because index column wasn't set.", ErrorCodes::LOGICAL_ERROR); UInt64 num_rows = size(); if (num_rows == 0) throw Exception("ReverseIndex can't insert row from column because it is empty.", ErrorCodes::LOGICAL_ERROR); UInt64 position = num_rows - 1; UInt64 inserted_pos = insert(position); if (position + base_index != inserted_pos) throw Exception("Can't insert into reverse index from last row (" + toString(position + base_index) + ") because the same row is in position " + toString(inserted_pos), ErrorCodes::LOGICAL_ERROR); return inserted_pos; } template UInt64 ReverseIndex::getInsertionPoint(const StringRef & data) { if (!index) buildIndex(); using IteratorType = typename IndexMapType::iterator; IteratorType iterator; auto hash = getHash(data); iterator = index->find(data, hash); return iterator == index->end() ? size() + base_index : *iterator; } }