diff --git a/src/Functions/EntropyLearnedHash.cpp b/src/Functions/EntropyLearnedHash.cpp index ed4ea8ba701..af5b2c3d232 100644 --- a/src/Functions/EntropyLearnedHash.cpp +++ b/src/Functions/EntropyLearnedHash.cpp @@ -30,11 +30,10 @@ namespace ErrorCodes namespace { -using Key = String; using PartialKeyPositions = std::vector; using Entropies = std::vector; -void getPartialKey(std::string_view key, const PartialKeyPositions & partial_key_positions, Key & result) +void getPartialKey(std::string_view key, const PartialKeyPositions & partial_key_positions, String & result) { result.clear(); result.reserve(partial_key_positions.size()); @@ -44,11 +43,11 @@ void getPartialKey(std::string_view key, const PartialKeyPositions & partial_key result.push_back(key[partial_key_position]); } -bool allPartialKeysAreUnique(const std::vector & keys, const PartialKeyPositions & partial_key_positions) +bool allPartialKeysAreUnique(const std::vector & keys, const PartialKeyPositions & partial_key_positions) { - std::unordered_set unique_partial_keys; + std::unordered_set unique_partial_keys; unique_partial_keys.reserve(keys.size()); - Key partial_key; + String partial_key; for (const auto & key : keys) { @@ -61,15 +60,15 @@ bool allPartialKeysAreUnique(const std::vector & keys, const PartialKeyPosi } // NextByte returns position of byte which adds the most entropy and the new entropy -std::pair nextByte(const std::vector & keys, size_t max_len, PartialKeyPositions & partial_key_positions) +std::pair nextByte(const std::vector & keys, size_t max_len, PartialKeyPositions & partial_key_positions) { size_t min_collisions = std::numeric_limits::max(); size_t best_position = 0; - std::unordered_map count_table; + std::unordered_map count_table; count_table.reserve(keys.size()); - Key partial_key; + String partial_key; for (size_t i = 0; i < max_len; ++i) { @@ -94,7 +93,7 @@ std::pair nextByte(const std::vector & keys, size_t max_len return {best_position, min_collisions}; } -std::pair chooseBytes(const std::vector & train_data) +std::pair chooseBytes(const std::vector & train_data) { if (train_data.size() <= 1) return {}; @@ -189,11 +188,11 @@ public: const size_t num_rows = col_data_string->size(); /// TODO this does some needless copying ... chooseBytes() should ideally understand the native ColumnString representation - std::vector training_data; + std::vector training_data; for (size_t i = 0; i < num_rows; ++i) { std::string_view string_view = col_data_string->getDataAt(i).toView(); - training_data.emplace_back(string_view.data(), string_view.size()); + training_data.emplace_back(string_view); } PartialKeyPositions partial_key_positions = chooseBytes(training_data).first; @@ -254,7 +253,7 @@ public: auto col_res = ColumnUInt64::create(num_rows); auto & col_res_vec = col_res->getData(); - Key partial_key; + String partial_key; for (size_t i = 0; i < num_rows; ++i) { std::string_view string_ref = col_data_string->getDataAt(i).toView();