#pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace DB { class ComplexKeyCacheDictionary final : public IDictionaryBase { public: ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime, const std::size_t size) : name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime), size{round_up_to_power_of_two(size)} { if (!this->source_ptr->supportsSelectiveLoad()) throw Exception{ name + ": source cannot be used with ComplexKeyCacheDictionary", ErrorCodes::UNSUPPORTED_METHOD }; createAttributes(); } ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other) : ComplexKeyCacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size} {} std::string getKeyDescription() const { return key_description; }; std::exception_ptr getCreationException() const override { return {}; } std::string getName() const override { return name; } std::string getTypeName() const override { return "ComplexKeyCache"; } std::size_t getBytesAllocated() const override { return bytes_allocated + key_size_is_fixed ? fixed_size_keys_pool->size() : keys_pool->size(); } std::size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); } double getHitRate() const override { return static_cast(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed); } std::size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); } double getLoadFactor() const override { return static_cast(element_count.load(std::memory_order_relaxed)) / size; } bool isCached() const override { return true; } DictionaryPtr clone() const override { return std::make_unique(*this); } const IDictionarySource * getSource() const override { return source_ptr.get(); } const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } const DictionaryStructure & getStructure() const override { return dict_struct; } std::chrono::time_point getCreationTime() const override { return creation_time; } bool isInjective(const std::string & attribute_name) const override { return dict_struct.attributes[&getAttribute(attribute_name) - attributes.data()].injective; } #define DECLARE_MULTIPLE_GETTER(TYPE)\ void get##TYPE(\ const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,\ PODArray & out) const\ {\ validateKeyTypes(key_types);\ \ auto & attribute = getAttribute(attribute_name);\ if (attribute.type != AttributeUnderlyingType::TYPE)\ throw Exception{\ name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ ErrorCodes::TYPE_MISMATCH\ };\ \ getItems(attribute, key_columns, out);\ } DECLARE_MULTIPLE_GETTER(UInt8) DECLARE_MULTIPLE_GETTER(UInt16) DECLARE_MULTIPLE_GETTER(UInt32) DECLARE_MULTIPLE_GETTER(UInt64) DECLARE_MULTIPLE_GETTER(Int8) DECLARE_MULTIPLE_GETTER(Int16) DECLARE_MULTIPLE_GETTER(Int32) DECLARE_MULTIPLE_GETTER(Int64) DECLARE_MULTIPLE_GETTER(Float32) DECLARE_MULTIPLE_GETTER(Float64) #undef DECLARE_MULTIPLE_GETTER void getString( const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types, ColumnString * out) const { validateKeyTypes(key_types); auto & attribute = getAttribute(attribute_name); if (attribute.type != AttributeUnderlyingType::String) throw Exception{ name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH }; getItems(attribute, key_columns, out); } #define DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(TYPE)\ void get##TYPE(\ const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types,\ const PODArray & def, PODArray & out) const\ {\ validateKeyTypes(key_types);\ \ auto & attribute = getAttribute(attribute_name);\ if (attribute.type != AttributeUnderlyingType::TYPE)\ throw Exception{\ name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),\ ErrorCodes::TYPE_MISMATCH\ };\ \ getItems(attribute, key_columns, out, &def);\ } DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(UInt8) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(UInt16) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(UInt32) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(UInt64) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(Int8) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(Int16) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(Int32) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(Int64) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(Float32) DECLARE_MULTIPLE_GETTER_WITH_DEFAULT(Float64) #undef DECLARE_MULTIPLE_GETTER_WITH_DEFAULT void getString( const std::string & attribute_name, const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types, const ColumnString * const def, ColumnString * const out) const { validateKeyTypes(key_types); auto & attribute = getAttribute(attribute_name); if (attribute.type != AttributeUnderlyingType::String) throw Exception{ name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH }; getItems(attribute, key_columns, out, def); } void has(const ConstColumnPlainPtrs & key_columns, const DataTypes & key_types, PODArray & out) const { validateKeyTypes(key_types); /// Mapping: -> { all indices `i` of `key_columns` such that `key_columns[i]` = } MapType> outdated_keys; const auto rows = key_columns.front()->size(); const auto keys_size = dict_struct.key->size(); StringRefs keys(keys_size); Arena temporary_keys_pool; PODArray keys_array(rows); { const Poco::ScopedReadRWLock read_lock{rw_lock}; const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows)) { const auto key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool); keys_array[row] = key; const auto hash = StringRefHash{}(key); const auto cell_idx = hash & (size - 1); const auto & cell = cells[cell_idx]; /** cell should be updated if either: * 1. keys (or hash) do not match, * 2. cell has expired, * 3. explicit defaults were specified and cell was set default. */ if (cell.hash != hash || cell.key != key || cell.expiresAt() < now) outdated_keys[key].push_back(row); else out[row] = !cell.isDefault(); } } query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_keys.size(), std::memory_order_release); if (outdated_keys.empty()) return; std::vector required_rows(outdated_keys.size()); std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [] (auto & pair) { return pair.second.front(); }); /// request new values update(key_columns, keys_array, required_rows, [&] (const auto key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = true; }, [&] (const auto key, const auto) { for (const auto out_idx : outdated_keys[key]) out[out_idx] = false; }); } private: template using MapType = HashMapWithSavedHash; template using ContainerType = Value[]; template using ContainerPtrType = std::unique_ptr>; struct cell_metadata_t final { using time_point_t = std::chrono::system_clock::time_point; using time_point_rep_t = time_point_t::rep; using time_point_urep_t = std::make_unsigned_t; static constexpr std::uint64_t EXPIRES_AT_MASK = std::numeric_limits::max(); static constexpr std::uint64_t IS_DEFAULT_MASK = ~EXPIRES_AT_MASK; StringRef key; decltype(StringRefHash{}(key)) hash; /// Stores both expiration time and `is_default` flag in the most significant bit time_point_urep_t data; /// Sets expiration time, resets `is_default` flag to false time_point_t expiresAt() const { return ext::safe_bit_cast(data & EXPIRES_AT_MASK); } void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast(t); } bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; } void setDefault() { data |= IS_DEFAULT_MASK; } }; struct attribute_t final { AttributeUnderlyingType type; std::tuple< UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64, String> null_values; std::tuple< ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType, ContainerPtrType> arrays; }; void createAttributes() { const auto size = dict_struct.attributes.size(); attributes.reserve(size); bytes_allocated += size * sizeof(cell_metadata_t); bytes_allocated += size * sizeof(attributes.front()); for (const auto & attribute : dict_struct.attributes) { attribute_index_by_name.emplace(attribute.name, attributes.size()); attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value)); if (attribute.hierarchical) throw Exception{ name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH }; } } attribute_t createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value) { attribute_t attr{type}; switch (type) { case AttributeUnderlyingType::UInt8: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(UInt8); break; case AttributeUnderlyingType::UInt16: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(UInt16); break; case AttributeUnderlyingType::UInt32: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(UInt32); break; case AttributeUnderlyingType::UInt64: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(UInt64); break; case AttributeUnderlyingType::Int8: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(Int8); break; case AttributeUnderlyingType::Int16: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(Int16); break; case AttributeUnderlyingType::Int32: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(Int32); break; case AttributeUnderlyingType::Int64: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(Int64); break; case AttributeUnderlyingType::Float32: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(Float32); break; case AttributeUnderlyingType::Float64: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(Float64); break; case AttributeUnderlyingType::String: std::get(attr.null_values) = null_value.get(); std::get>(attr.arrays) = std::make_unique>(size); bytes_allocated += size * sizeof(StringRef); break; } return attr; } static std::string createKeyDescription(const DictionaryStructure & dict_struct) { std::ostringstream out; out << '('; auto first = true; for (const auto & key : *dict_struct.key) { if (!first) out << ", "; first = false; out << key.type->getName(); } out << ')'; return out.str(); } void validateKeyTypes(const DataTypes & key_types) const { if (key_types.size() != dict_struct.key->size()) throw Exception{ "Key structure does not match, expected " + key_description, ErrorCodes::TYPE_MISMATCH }; for (const auto i : ext::range(0, key_types.size())) { const auto & expected_type = (*dict_struct.key)[i].type->getName(); const auto & actual_type = key_types[i]->getName(); if (expected_type != actual_type) throw Exception{ "Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found " + actual_type, ErrorCodes::TYPE_MISMATCH }; } } template void getItems( attribute_t & attribute, const ConstColumnPlainPtrs & key_columns, PODArray & out, const PODArray * const def = nullptr) const { /// Mapping: -> { all indices `i` of `key_columns` such that `key_columns[i]` = } MapType> outdated_keys; auto & attribute_array = std::get>(attribute.arrays); const auto rows = key_columns.front()->size(); const auto keys_size = dict_struct.key->size(); StringRefs keys(keys_size); Arena temporary_keys_pool; PODArray keys_array(rows); { const Poco::ScopedReadRWLock read_lock{rw_lock}; const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows)) { const auto key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool); keys_array[row] = key; const auto hash = StringRefHash{}(key); const auto cell_idx = hash & (size - 1); const auto & cell = cells[cell_idx]; /** cell should be updated if either: * 1. keys (or hash) do not match, * 2. cell has expired, * 3. explicit defaults were specified and cell was set default. */ if (cell.hash != hash || cell.key != key || cell.expiresAt() < now) outdated_keys[key].push_back(row); else out[row] = def && cell.isDefault() ? (*def)[row] : attribute_array[cell_idx]; } } query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_keys.size(), std::memory_order_release); if (outdated_keys.empty()) return; std::vector required_rows(outdated_keys.size()); std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [] (auto & pair) { return pair.second.front(); }); /// request new values update(key_columns, keys_array, required_rows, [&] (const auto key, const auto cell_idx) { const auto attribute_value = attribute_array[cell_idx]; for (const auto out_idx : outdated_keys[key]) out[out_idx] = attribute_value; }, [&] (const auto key, const auto cell_idx) { const auto attribute_value = !def ? attribute_array[cell_idx] : (*def)[outdated_keys[key].front()]; for (const auto out_idx : outdated_keys[key]) out[out_idx] = attribute_value; }); } void getItems( attribute_t & attribute, const ConstColumnPlainPtrs & key_columns, ColumnString * out, const ColumnString * const def = nullptr) const { const auto rows = key_columns.front()->size(); /// save on some allocations out->getOffsets().reserve(rows); const auto keys_size = dict_struct.key->size(); StringRefs keys(keys_size); Arena temporary_keys_pool; auto & attribute_array = std::get>(attribute.arrays); auto found_outdated_values = false; /// perform optimistic version, fallback to pessimistic if failed { const Poco::ScopedReadRWLock read_lock{rw_lock}; const auto now = std::chrono::system_clock::now(); /// fetch up-to-date values, discard on fail for (const auto row : ext::range(0, rows)) { const auto key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool); SCOPE_EXIT(temporary_keys_pool.rollback(key.size)); const auto hash = StringRefHash{}(key); const auto cell_idx = hash & (size - 1); const auto & cell = cells[cell_idx]; if (cell.hash != hash || cell.key != key || cell.expiresAt() < now) { found_outdated_values = true; break; } else { const auto string_ref = def && cell.isDefault() ? def->getDataAt(row) : attribute_array[cell_idx]; out->insertData(string_ref.data, string_ref.size); } } } /// optimistic code completed successfully if (!found_outdated_values) { query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows, std::memory_order_release); return; } /// now onto the pessimistic one, discard possible partial results from the optimistic path out->getChars().resize_assume_reserved(0); out->getOffsets().resize_assume_reserved(0); /// Mapping: -> { all indices `i` of `key_columns` such that `key_columns[i]` = } MapType> outdated_keys; /// we are going to store every string separately MapType map; PODArray keys_array(rows); std::size_t total_length = 0; { const Poco::ScopedReadRWLock read_lock{rw_lock}; const auto now = std::chrono::system_clock::now(); for (const auto row : ext::range(0, rows)) { const auto key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool); keys_array[row] = key; const auto hash = StringRefHash{}(key); const auto cell_idx = hash & (size - 1); const auto & cell = cells[cell_idx]; if (cell.hash != hash || cell.key != key || cell.expiresAt() < now) outdated_keys[key].push_back(row); else { const auto string_ref = def && cell.isDefault() ? def->getDataAt(row) : attribute_array[cell_idx]; map[key] = String{string_ref}; total_length += string_ref.size + 1; } } } query_count.fetch_add(rows, std::memory_order_relaxed); hit_count.fetch_add(rows - outdated_keys.size(), std::memory_order_release); /// request new values if (!outdated_keys.empty()) { std::vector required_rows(outdated_keys.size()); std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [] (auto & pair) { return pair.second.front(); }); update(key_columns, keys_array, required_rows, [&] (const auto key, const auto cell_idx) { const auto attribute_value = attribute_array[cell_idx]; map[key] = String{attribute_value}; total_length += (attribute_value.size + 1) * outdated_keys[key].size(); }, [&] (const auto key, const auto cell_idx) { auto attribute_value = def ? def->getDataAt(outdated_keys[key].front()) : attribute_array[cell_idx]; map[key] = String{attribute_value}; total_length += (attribute_value.size + 1) * outdated_keys[key].size(); }); } out->getChars().reserve(total_length); const auto & null_value = std::get(attribute.null_values); for (const auto key : keys_array) { const auto it = map.find(key); /// @note check seems redundant, null_values are explicitly stored in the `map` const auto & string = it != map.end() ? it->second : null_value; out->insertData(string.data(), string.size()); } } template void update( const ConstColumnPlainPtrs & in_key_columns, const PODArray & in_keys, const std::vector & in_requested_rows, PresentKeyHandler && on_cell_updated, AbsentKeyHandler && on_key_not_found) const { auto stream = source_ptr->loadKeys(in_key_columns, in_requested_rows); stream->readPrefix(); MapType remaining_keys{in_requested_rows.size()}; for (const auto row : in_requested_rows) remaining_keys.insert({ in_keys[row], 0 }); std::uniform_int_distribution distribution{ dict_lifetime.min_sec, dict_lifetime.max_sec }; const Poco::ScopedWriteRWLock write_lock{rw_lock}; const auto keys_size = dict_struct.key->size(); StringRefs keys(keys_size); const auto attributes_size = attributes.size(); while (const auto block = stream->read()) { /// cache column pointers const auto key_columns = ext::map(ext::range(0, keys_size), [&] (const std::size_t attribute_idx) { return block.getByPosition(attribute_idx).column.get(); }); const auto attribute_columns = ext::map(ext::range(0, attributes_size), [&] (const std::size_t attribute_idx) { return block.getByPosition(keys_size + attribute_idx).column.get(); }); const auto rows = block.rowsInFirstColumn(); for (const auto row : ext::range(0, rows)) { auto key = allocKey(row, key_columns, keys); const auto hash = StringRefHash{}(key); const auto cell_idx = hash & (size - 1); auto & cell = cells[cell_idx]; for (const auto attribute_idx : ext::range(0, attributes.size())) { const auto & attribute_column = *attribute_columns[attribute_idx]; auto & attribute = attributes[attribute_idx]; setAttributeValue(attribute, cell_idx, attribute_column[row]); } /// if cell id is zero and zero does not map to this cell, then the cell is unused if (cell.key == StringRef{} && cell_idx != zero_cell_idx) element_count.fetch_add(1, std::memory_order_relaxed); /// handle memory allocated for old key if (key == cell.key) { freeKey(key); key = cell.key; } else { /// new key is different from the old one if (cell.key.data) freeKey(cell.key); cell.key = key; } cell.hash = hash; if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}); else cell.setExpiresAt(std::chrono::time_point::max()); /// inform caller on_cell_updated(key, cell_idx); /// mark corresponding id as found remaining_keys[key] = 1; } } stream->readSuffix(); /// Check which ids have not been found and require setting null_value for (const auto key_found_pair : remaining_keys) { if (key_found_pair.second) continue; auto key = key_found_pair.first; const auto hash = StringRefHash{}(key); const auto cell_idx = hash & (size - 1); auto & cell = cells[cell_idx]; /// Set null_value for each attribute for (auto & attribute : attributes) setDefaultAttributeValue(attribute, cell_idx); /// Check if cell had not been occupied before and increment element counter if it hadn't if (cell.key == StringRef{} && cell_idx != zero_cell_idx) element_count.fetch_add(1, std::memory_order_relaxed); if (key == cell.key) key = cell.key; else { if (cell.key.data) freeKey(cell.key); /// copy key from temporary pool key = copyKey(key); cell.key = key; } cell.hash = hash; if (dict_lifetime.min_sec != 0 && dict_lifetime.max_sec != 0) cell.setExpiresAt(std::chrono::system_clock::now() + std::chrono::seconds{distribution(rnd_engine)}); else cell.setExpiresAt(std::chrono::time_point::max()); cell.setDefault(); /// inform caller that the cell has not been found on_key_not_found(key, cell_idx); } } std::uint64_t getCellIdx(const StringRef key) const { const auto hash = StringRefHash{}(key); const auto idx = hash & (size - 1); return idx; } void setDefaultAttributeValue(attribute_t & attribute, const std::size_t idx) const { switch (attribute.type) { case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = std::get(attribute.null_values); break; case AttributeUnderlyingType::String: { const auto & null_value_ref = std::get(attribute.null_values); auto & string_ref = std::get>(attribute.arrays)[idx]; if (string_ref.data == null_value_ref.data()) return; if (string_ref.size != 0) bytes_allocated -= string_ref.size + 1; const std::unique_ptr deleter{string_ref.data}; string_ref = StringRef{null_value_ref}; break; } } } void setAttributeValue(attribute_t & attribute, const std::size_t idx, const Field & value) const { switch (attribute.type) { case AttributeUnderlyingType::UInt8: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::UInt16: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::UInt32: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::UInt64: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::Int8: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::Int16: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::Int32: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::Int64: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::Float32: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::Float64: std::get>(attribute.arrays)[idx] = value.get(); break; case AttributeUnderlyingType::String: { const auto & string = value.get(); auto & string_ref = std::get>(attribute.arrays)[idx]; const auto & null_value_ref = std::get(attribute.null_values); if (string_ref.data != null_value_ref.data()) { if (string_ref.size != 0) bytes_allocated -= string_ref.size + 1; /// avoid explicit delete, let unique_ptr handle it const std::unique_ptr deleter{string_ref.data}; } const auto size = string.size(); if (size != 0) { auto string_ptr = std::make_unique(size + 1); std::copy(string.data(), string.data() + size + 1, string_ptr.get()); string_ref = StringRef{string_ptr.release(), size}; bytes_allocated += size + 1; } else string_ref = {}; break; } } } attribute_t & getAttribute(const std::string & attribute_name) const { const auto it = attribute_index_by_name.find(attribute_name); if (it == std::end(attribute_index_by_name)) throw Exception{ name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS }; return attributes[it->second]; } StringRef allocKey(const std::size_t row, const ConstColumnPlainPtrs & key_columns, StringRefs & keys) const { if (key_size_is_fixed) return placeKeysInFixedSizePool(row, key_columns); return placeKeysInPool(row, key_columns, keys, *keys_pool); } void freeKey(const StringRef key) const { if (key_size_is_fixed) fixed_size_keys_pool->free(key.data); else keys_pool->free(key.data, key.size); } static std::size_t round_up_to_power_of_two(std::size_t n) { --n; n |= n >> 1; n |= n >> 2; n |= n >> 4; n |= n >> 8; n |= n >> 16; n |= n >> 32; ++n; return n; } static std::uint64_t getSeed() { timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_nsec ^ getpid(); } template static StringRef placeKeysInPool( const std::size_t row, const ConstColumnPlainPtrs & key_columns, StringRefs & keys, Arena & pool) { const auto keys_size = key_columns.size(); size_t sum_keys_size{}; for (const auto i : ext::range(0, keys_size)) { keys[i] = key_columns[i]->getDataAtWithTerminatingZero(row); sum_keys_size += keys[i].size; } const auto res = pool.alloc(sum_keys_size); auto place = res; for (size_t j = 0; j < keys_size; ++j) { memcpy(place, keys[j].data, keys[j].size); place += keys[j].size; } return { res, sum_keys_size }; } StringRef placeKeysInFixedSizePool( const std::size_t row, const ConstColumnPlainPtrs & key_columns) const { const auto res = fixed_size_keys_pool->alloc(); auto place = res; for (const auto & key_column : key_columns) { const auto key = key_column->getDataAt(row); memcpy(place, key.data, key.size); place += key.size; } return { res, key_size }; } StringRef copyKey(const StringRef key) const { const auto res = key_size_is_fixed ? fixed_size_keys_pool->alloc() : keys_pool->alloc(key.size); memcpy(res, key.data, key.size); return { res, key.size }; } const std::string name; const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; const DictionaryLifetime dict_lifetime; const std::string key_description{createKeyDescription(dict_struct)}; mutable Poco::RWLock rw_lock; const std::size_t size; const std::uint64_t zero_cell_idx{getCellIdx(StringRef{})}; std::map attribute_index_by_name; mutable std::vector attributes; mutable std::vector cells{size}; const bool key_size_is_fixed{dict_struct.isKeySizeFixed()}; std::size_t key_size{key_size_is_fixed ? dict_struct.getKeySize() : 0}; std::unique_ptr keys_pool = key_size_is_fixed ? nullptr : std::make_unique(); std::unique_ptr fixed_size_keys_pool = key_size_is_fixed ? std::make_unique(key_size) : nullptr; mutable std::mt19937_64 rnd_engine{getSeed()}; mutable std::size_t bytes_allocated = 0; mutable std::atomic element_count{0}; mutable std::atomic hit_count{0}; mutable std::atomic query_count{0}; const std::chrono::time_point creation_time = std::chrono::system_clock::now(); }; }