diff --git a/dbms/src/Columns/ColumnFixedString.cpp b/dbms/src/Columns/ColumnFixedString.cpp index 12c43b2e0e2..6aeee31e18e 100644 --- a/dbms/src/Columns/ColumnFixedString.cpp +++ b/dbms/src/Columns/ColumnFixedString.cpp @@ -76,16 +76,6 @@ void ColumnFixedString::insertData(const char * pos, size_t length) memcpy(&chars[old_size], pos, length); } -size_t ColumnFixedString::getSerializedSize(size_t index) const -{ - return n; -} - -void ColumnFixedString::serializeValue(size_t index, char * buffer) const -{ - memcpy(buffer, &chars[n * index], n); -} - StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const { auto pos = arena.allocContinue(n, begin); diff --git a/dbms/src/Columns/ColumnFixedString.h b/dbms/src/Columns/ColumnFixedString.h index 04f61d9d9c5..8331544432a 100644 --- a/dbms/src/Columns/ColumnFixedString.h +++ b/dbms/src/Columns/ColumnFixedString.h @@ -93,10 +93,6 @@ public: chars.resize_assume_reserved(chars.size() - n * elems); } - size_t getSerializedSize(size_t n) const override; - - void serializeValue(size_t n, char * buffer) const override; - StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/dbms/src/Columns/ColumnString.h b/dbms/src/Columns/ColumnString.h index 41974ca8a48..ecc19c4a35c 100644 --- a/dbms/src/Columns/ColumnString.h +++ b/dbms/src/Columns/ColumnString.h @@ -158,20 +158,6 @@ public: offsets.resize_assume_reserved(offsets.size() - n); } - size_t getSerializedSize(size_t n) const override - { - return sizeof(size_t) + sizeAt(n); - } - - void serializeValue(size_t n, char * buffer) const override - { - size_t string_size = sizeAt(n); - size_t offset = offsetAt(n); - - memcpy(buffer, &string_size, sizeof(string_size)); - memcpy(buffer + sizeof(string_size), &chars[offset], string_size); - } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override { size_t string_size = sizeAt(n); diff --git a/dbms/src/Columns/ColumnVector.cpp b/dbms/src/Columns/ColumnVector.cpp index 8465614fc2b..21f89514daf 100644 --- a/dbms/src/Columns/ColumnVector.cpp +++ b/dbms/src/Columns/ColumnVector.cpp @@ -27,17 +27,6 @@ namespace ErrorCodes extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } -template -size_t ColumnVector::getSerializedSize(size_t n) const -{ - return sizeof(T); -} - -template -void ColumnVector::serializeValue(size_t n, char * buffer) const -{ - memcpy(buffer, &data[n], sizeof(T)); -} template StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const diff --git a/dbms/src/Columns/ColumnVector.h b/dbms/src/Columns/ColumnVector.h index 3dab3c98b42..dbddd50b7ad 100644 --- a/dbms/src/Columns/ColumnVector.h +++ b/dbms/src/Columns/ColumnVector.h @@ -171,9 +171,6 @@ public: data.resize_assume_reserved(data.size() - n); } - size_t getSerializedSize(size_t n) const override; - void serializeValue(size_t n, char * buffer) const override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/dbms/src/Columns/IColumn.h b/dbms/src/Columns/IColumn.h index 406db277213..6166338501e 100644 --- a/dbms/src/Columns/IColumn.h +++ b/dbms/src/Columns/IColumn.h @@ -157,11 +157,6 @@ public: */ virtual void popBack(size_t n) = 0; - // Returns number of bytes needed to serializes n-th element. - virtual size_t getSerializedSize(size_t n) const { throw Exception("Cannot getSerializedSize() for column " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - // Serializes n-th element. Writes getSerializedSize(n) bytes into buffer - virtual void serializeValue(size_t n, char * buffer) const { throw Exception("Cannot serializeValue() for column " + getName(), ErrorCodes::NOT_IMPLEMENTED); } - /** Serializes n-th element. Serialized element should be placed continuously inside Arena's memory. * Serialized value can be deserialized to reconstruct original object. Is used in aggregation. * The method is similar to getDataAt(), but can work when element's value cannot be mapped to existing continuous memory chunk, diff --git a/dbms/src/Dictionaries/CacheDictionary.cpp b/dbms/src/Dictionaries/CacheDictionary.cpp index 8f33921ac44..d10b84897fa 100644 --- a/dbms/src/Dictionaries/CacheDictionary.cpp +++ b/dbms/src/Dictionaries/CacheDictionary.cpp @@ -977,7 +977,7 @@ PaddedPODArray CacheDictionary::getCachedIds() const for (size_t idx = 0; idx < cells.size(); ++idx) { auto & cell = cells[idx]; - if (!isEmptyCell(idx)) + if (!isEmptyCell(idx) && !cells[idx].isDefault()) { array.push_back(cell.id); } diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp index 4a0fb3fe450..c178a6d482b 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.cpp @@ -266,7 +266,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows_num)) { - const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool); + const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool); keys_array[row] = key; const auto find_result = findCellIdx(key, now); const auto & cell_idx = find_result.cell_idx; @@ -458,7 +458,7 @@ void ComplexKeyCacheDictionary::getItemsNumberImpl( /// fetch up-to-date values, decide which ones require update for (const auto row : ext::range(0, rows_num)) { - const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool); + const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool); keys_array[row] = key; const auto find_result = findCellIdx(key, now); @@ -537,7 +537,7 @@ void ComplexKeyCacheDictionary::getItemsString( /// fetch up-to-date values, discard on fail for (const auto row : ext::range(0, rows_num)) { - const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool); + const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool); SCOPE_EXIT(temporary_keys_pool.rollback(key.size)); const auto find_result = findCellIdx(key, now); @@ -582,7 +582,7 @@ void ComplexKeyCacheDictionary::getItemsString( const auto now = std::chrono::system_clock::now(); for (const auto row : ext::range(0, rows_num)) { - const StringRef key = placeKeysInPool(row, key_columns, keys, temporary_keys_pool); + const StringRef key = placeKeysInPool(row, key_columns, keys, *dict_struct.key, temporary_keys_pool); keys_array[row] = key; const auto find_result = findCellIdx(key, now); @@ -900,7 +900,7 @@ StringRef ComplexKeyCacheDictionary::allocKey(const size_t row, const Columns & if (key_size_is_fixed) return placeKeysInFixedSizePool(row, key_columns); - return placeKeysInPool(row, key_columns, keys, *keys_pool); + return placeKeysInPool(row, key_columns, keys, *dict_struct.key, *keys_pool); } void ComplexKeyCacheDictionary::freeKey(const StringRef key) const @@ -913,15 +913,18 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const template StringRef ComplexKeyCacheDictionary::placeKeysInPool( - const size_t row, const Columns & key_columns, StringRefs & keys, Pool & pool) + const size_t row, const Columns & key_columns, StringRefs & keys, + const std::vector & key_attributes, Pool & pool) { const auto keys_size = key_columns.size(); size_t sum_keys_size{}; for (size_t j = 0; j < keys_size; ++j) { - keys[j].size = key_columns[j]->getSerializedSize(row); + keys[j] = key_columns[j]->getDataAt(row); sum_keys_size += keys[j].size; + if (key_attributes[j].underlying_type == AttributeUnderlyingType::String) + sum_keys_size += sizeof(size_t) + 1; } auto place = pool.alloc(sum_keys_size); @@ -929,9 +932,25 @@ StringRef ComplexKeyCacheDictionary::placeKeysInPool( auto key_start = place; for (size_t j = 0; j < keys_size; ++j) { - key_columns[j]->serializeValue(row, key_start); - keys[j].data = key_start; - key_start += keys[j].size; + if (key_attributes[j].underlying_type == AttributeUnderlyingType::String) + { + auto start = key_start; + auto key_size = keys[j].size + 1; + memcpy(key_start, &key_size, sizeof(size_t)); + key_start += sizeof(size_t); + memcpy(key_start, keys[j].data, keys[j].size); + key_start += keys[j].size; + *key_start = '\0'; + ++key_start; + keys[j].data = start; + keys[j].size += sizeof(size_t) + 1; + } + else + { + memcpy(key_start, keys[j].data, keys[j].size); + keys[j].data = key_start; + key_start += keys[j].size; + } } return { place, sum_keys_size }; @@ -981,7 +1000,8 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs}; for (auto idx : ext::range(0, cells.size())) - if (!isEmptyCell(idx)) + if (!isEmptyCell(idx) + && !cells[idx].isDefault()) keys.push_back(cells[idx].key); } @@ -989,5 +1009,4 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & return std::make_shared(shared_from_this(), max_block_size, keys, column_names); } - } diff --git a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h index bc098719c87..f4f1c9a5c9b 100644 --- a/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h +++ b/dbms/src/Dictionaries/ComplexKeyCacheDictionary.h @@ -235,7 +235,8 @@ private: template static StringRef placeKeysInPool( - const std::size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool); + const std::size_t row, const Columns & key_columns, StringRefs & keys, + const std::vector & key_attributes, Arena & pool); StringRef placeKeysInFixedSizePool( const std::size_t row, const Columns & key_columns) const;