From 2d069acc220347942ac3716168ded3dc7f9ded12 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 8 Oct 2021 16:13:56 +0300 Subject: [PATCH 1/5] System table data skipping indices added size --- .../system-tables/data_skipping_indices.md | 9 +++ docs/en/operations/system-tables/parts.md | 9 +++ src/Storages/IStorage.h | 7 ++ src/Storages/MergeTree/IMergeTreeDataPart.cpp | 50 +++++++++++++- src/Storages/MergeTree/IMergeTreeDataPart.h | 21 +++++- src/Storages/MergeTree/MergeTreeData.cpp | 68 ++++++++++++++----- src/Storages/MergeTree/MergeTreeData.h | 20 ++++-- .../MergeTree/MergedBlockOutputStream.cpp | 3 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- .../StorageSystemDataSkippingIndices.cpp | 19 ++++++ src/Storages/System/StorageSystemParts.cpp | 10 +++ 11 files changed, 192 insertions(+), 26 deletions(-) diff --git a/docs/en/operations/system-tables/data_skipping_indices.md b/docs/en/operations/system-tables/data_skipping_indices.md index 683666e1f77..add89ae9144 100644 --- a/docs/en/operations/system-tables/data_skipping_indices.md +++ b/docs/en/operations/system-tables/data_skipping_indices.md @@ -10,6 +10,9 @@ Columns: - `type` ([String](../../sql-reference/data-types/string.md)) — Index type. - `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation. - `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. **Example** @@ -26,6 +29,9 @@ name: clicks_idx type: minmax expr: clicks granularity: 1 +data_compressed_bytes: 58 +data_uncompressed_bytes: 6 +marks: 48 Row 2: ────── @@ -35,4 +41,7 @@ name: contacts_null_idx type: minmax expr: assumeNotNull(contacts_null) granularity: 1 +data_compressed_bytes: 58 +data_uncompressed_bytes: 6 +marks: 48 ``` diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 51a0a1180f3..45fdcc40451 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -38,6 +38,12 @@ Columns: - `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. +- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. + +- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. + +- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks for secondary indices. + - `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation. - `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time when the data part became inactive. @@ -119,6 +125,9 @@ rows: 6 bytes_on_disk: 310 data_compressed_bytes: 157 data_uncompressed_bytes: 91 +secondary_indices_compressed_bytes: 58 +secondary_indices_uncompressed_bytes: 6 +secondary_indices_marks_bytes: 48 marks_bytes: 144 modification_time: 2020-06-18 13:01:49 remove_time: 1970-01-01 00:00:00 diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6ce17552ba1..0a9d1113601 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -87,6 +87,8 @@ struct ColumnSize } }; +using IndexSize = ColumnSize; + /** Storage. Describes the table. Responsible for * - storage of the table data; * - the definition in which files (or not in files) the data is stored; @@ -163,6 +165,11 @@ public: using ColumnSizeByName = std::unordered_map; virtual ColumnSizeByName getColumnSizes() const { return {}; } + /// Optional size information of each secondary index. + /// Valid only for MergeTree family. + using IndexSizeByName = std::unordered_map; + virtual IndexSizeByName getSecondaryIndexSizes() const { return {}; } + /// Get mutable version (snapshot) of storage metadata. Metadata object is /// multiversion, so it can be concurrently changed, but returned copy can be /// used without any locks. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index dc2c5f8185d..1a6290580a0 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -584,7 +584,7 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks loadColumns(require_columns_checksums); loadChecksums(require_columns_checksums); loadIndexGranularity(); - calculateColumnsSizesOnDisk(); + calculateColumnsAndSecondaryIndicesSizesOnDisk(); loadIndex(); /// Must be called after loadIndexGranularity as it uses the value of `index_granularity` loadRowsCount(); /// Must be called after loadIndexGranularity() as it uses the value of `index_granularity`. loadPartitionAndMinMaxIndex(); @@ -1420,6 +1420,11 @@ void IMergeTreeDataPart::checkConsistency(bool /* require_part_metadata */) cons throw Exception("Method 'checkConsistency' is not implemented for part with type " + getType().toString(), ErrorCodes::NOT_IMPLEMENTED); } +void IMergeTreeDataPart::calculateColumnsAndSecondaryIndicesSizesOnDisk() +{ + calculateColumnsSizesOnDisk(); + calculateSecondaryIndicesSizesOnDisk(); +} void IMergeTreeDataPart::calculateColumnsSizesOnDisk() { @@ -1429,6 +1434,40 @@ void IMergeTreeDataPart::calculateColumnsSizesOnDisk() calculateEachColumnSizes(columns_sizes, total_columns_size); } +void IMergeTreeDataPart::calculateSecondaryIndicesSizesOnDisk() +{ + if (checksums.empty()) + throw Exception("Cannot calculate secondary indexes sizes when columns or checksums are not initialized", ErrorCodes::LOGICAL_ERROR); + + auto secondary_indices_descriptions = storage.getInMemoryMetadataPtr()->secondary_indices; + + for (auto & index_description : secondary_indices_descriptions) + { + ColumnSize index_size; + + auto index_ptr = MergeTreeIndexFactory::instance().get(index_description); + auto index_name = index_ptr->getFileName(); + auto index_name_escaped = escapeForFileName(index_name); + + auto index_file_name = index_name_escaped + index_ptr->getSerializedFileExtension(); + auto index_marks_file_name = index_name_escaped + index_granularity_info.marks_file_extension; + + auto bin_checksum = checksums.files.find(index_file_name); + if (bin_checksum != checksums.files.end()) + { + index_size.data_compressed = bin_checksum->second.file_size; + index_size.data_uncompressed = bin_checksum->second.uncompressed_size; + } + + auto mrk_checksum = checksums.files.find(index_marks_file_name); + if (mrk_checksum != checksums.files.end()) + index_size.marks = mrk_checksum->second.file_size; + + total_secondary_indices_size.add(index_size); + secondary_index_sizes[index_description.name] = index_size; + } +} + ColumnSize IMergeTreeDataPart::getColumnSize(const String & column_name, const IDataType & /* type */) const { /// For some types of parts columns_size maybe not calculated @@ -1439,6 +1478,15 @@ ColumnSize IMergeTreeDataPart::getColumnSize(const String & column_name, const I return ColumnSize{}; } +IndexSize IMergeTreeDataPart::getSecondaryIndexSize(const String & secondary_index_name) const +{ + auto it = secondary_index_sizes.find(secondary_index_name); + if (it != secondary_index_sizes.end()) + return it->second; + + return ColumnSize{}; +} + void IMergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) const { for (const auto & [column_name, size] : columns_sizes) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index be48aed5c8b..ceb3ed64170 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -55,6 +55,8 @@ public: using ColumnSizeByName = std::unordered_map; using NameToNumber = std::unordered_map; + using IndexSizeByName = std::unordered_map; + using Type = MergeTreeDataPartType; @@ -101,9 +103,16 @@ public: /// Otherwise return information about column size on disk. ColumnSize getColumnSize(const String & column_name, const IDataType & /* type */) const; + /// NOTE: Returns zeros if secondary indexes are not found in checksums. + /// Otherwise return information about secondary index size on disk. + IndexSize getSecondaryIndexSize(const String & secondary_index_name) const; + /// Return information about column size on disk for all columns in part ColumnSize getTotalColumnsSize() const { return total_columns_size; } + /// Return information about secondary indexes size on disk for all indexes in part + IndexSize getTotalSeconaryIndicesSize() const { return total_secondary_indices_size; } + virtual String getFileNameForColumn(const NameAndTypePair & column) const = 0; virtual ~IMergeTreeDataPart(); @@ -341,7 +350,9 @@ public: /// Calculate the total size of the entire directory with all the files static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk_, const String & from); - void calculateColumnsSizesOnDisk(); + + /// Calculate column and secondary indices sizes on disk. + void calculateColumnsAndSecondaryIndicesSizesOnDisk(); String getRelativePathForPrefix(const String & prefix) const; @@ -396,6 +407,10 @@ protected: /// Size for each column, calculated once in calcuateColumnSizesOnDisk ColumnSizeByName columns_sizes; + ColumnSize total_secondary_indices_size; + + IndexSizeByName secondary_index_sizes; + /// Total size on disk, not only columns. May not contain size of /// checksums.txt and columns.txt. 0 - if not counted; UInt64 bytes_on_disk{0}; @@ -450,6 +465,10 @@ private: void loadPartitionAndMinMaxIndex(); + void calculateColumnsSizesOnDisk(); + + void calculateSecondaryIndicesSizesOnDisk(); + /// Load default compression codec from file default_compression_codec.txt /// if it not exists tries to deduce codec from compressed column without /// any specifial compression. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index c04e0d2e38f..10e5fe9e71f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1167,7 +1167,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } } - calculateColumnSizesImpl(); + calculateColumnAndSecondaryIndexSizesImpl(); LOG_DEBUG(log, "Loaded data parts ({} items)", data_parts_indexes.size()); @@ -2352,7 +2352,7 @@ bool MergeTreeData::renameTempPartAndReplace( { covered_part->remove_time.store(current_time, std::memory_order_relaxed); modifyPartState(covered_part, DataPartState::Outdated); - removePartContributionToColumnSizes(covered_part); + removePartContributionToColumnAndSecondaryIndexSizes(covered_part); reduce_bytes += covered_part->getBytesOnDisk(); reduce_rows += covered_part->rows_count; ++reduce_parts; @@ -2361,7 +2361,7 @@ bool MergeTreeData::renameTempPartAndReplace( decreaseDataVolume(reduce_bytes, reduce_rows, reduce_parts); modifyPartState(part_it, DataPartState::Committed); - addPartContributionToColumnSizes(part); + addPartContributionToColumnAndSecondaryIndexSizes(part); addPartContributionToDataVolume(part); } @@ -2404,7 +2404,7 @@ void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVect { if (part->getState() == IMergeTreeDataPart::State::Committed) { - removePartContributionToColumnSizes(part); + removePartContributionToColumnAndSecondaryIndexSizes(part); removePartContributionToDataVolume(part); } @@ -2542,7 +2542,7 @@ restore_covered) if (part->getState() == DataPartState::Committed) { removePartContributionToDataVolume(part); - removePartContributionToColumnSizes(part); + removePartContributionToColumnAndSecondaryIndexSizes(part); } modifyPartState(it_part, DataPartState::Deleting); @@ -2590,7 +2590,7 @@ restore_covered) if ((*it)->getState() != DataPartState::Committed) { - addPartContributionToColumnSizes(*it); + addPartContributionToColumnAndSecondaryIndexSizes(*it); addPartContributionToDataVolume(*it); modifyPartState(it, DataPartState::Committed); // iterator is not invalidated here } @@ -2621,7 +2621,7 @@ restore_covered) if ((*it)->getState() != DataPartState::Committed) { - addPartContributionToColumnSizes(*it); + addPartContributionToColumnAndSecondaryIndexSizes(*it); addPartContributionToDataVolume(*it); modifyPartState(it, DataPartState::Committed); } @@ -2973,32 +2973,46 @@ static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) part->modification_time = disk->getLastModified(full_part_path).epochTime(); } -void MergeTreeData::calculateColumnSizesImpl() +void MergeTreeData::calculateColumnAndSecondaryIndexSizesImpl() { + std::cerr << "MergeTreeData::calculateColumnAndSecondaryIndexSizesImpl" << std::endl; + column_sizes.clear(); /// Take into account only committed parts auto committed_parts_range = getDataPartsStateRange(DataPartState::Committed); for (const auto & part : committed_parts_range) - addPartContributionToColumnSizes(part); + addPartContributionToColumnAndSecondaryIndexSizes(part); } -void MergeTreeData::addPartContributionToColumnSizes(const DataPartPtr & part) +void MergeTreeData::addPartContributionToColumnAndSecondaryIndexSizes(const DataPartPtr & part) { + std::cerr << "MergeTreeData::addPartContributionToColumnAndSecondaryIndexSizes " << part->name << std::endl; + for (const auto & column : part->getColumns()) { + std::cerr << "Column name " << column.name << std::endl; ColumnSize & total_column_size = column_sizes[column.name]; + std::cerr << "Total column size compressed " << total_column_size.data_compressed << " uncompressed size " << total_column_size.data_uncompressed << std::endl; ColumnSize part_column_size = part->getColumnSize(column.name, *column.type); total_column_size.add(part_column_size); } + + auto indexes_descriptions = getInMemoryMetadataPtr()->secondary_indices; + for (const auto & index : indexes_descriptions) + { + IndexSize & total_secondary_index_size = secondary_index_sizes[index.name]; + IndexSize part_index_size = part->getSecondaryIndexSize(index.name); + total_secondary_index_size.add(part_index_size); + } } -void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part) +void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const DataPartPtr & part) { for (const auto & column : part->getColumns()) { ColumnSize & total_column_size = column_sizes[column.name]; - ColumnSize part_column_size = part->getColumnSize(column.name, *column.type); + ColumnSize part_secondary_index_size = part->getColumnSize(column.name, *column.type); auto log_subtract = [&](size_t & from, size_t value, const char * field) { @@ -3009,9 +3023,29 @@ void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part from -= value; }; - log_subtract(total_column_size.data_compressed, part_column_size.data_compressed, ".data_compressed"); - log_subtract(total_column_size.data_uncompressed, part_column_size.data_uncompressed, ".data_uncompressed"); - log_subtract(total_column_size.marks, part_column_size.marks, ".marks"); + log_subtract(total_column_size.data_compressed, part_secondary_index_size.data_compressed, ".data_compressed"); + log_subtract(total_column_size.data_uncompressed, part_secondary_index_size.data_uncompressed, ".data_uncompressed"); + log_subtract(total_column_size.marks, part_secondary_index_size.marks, ".marks"); + } + + auto indexes_descriptions = getInMemoryMetadataPtr()->secondary_indices; + for (const auto & index : indexes_descriptions) + { + IndexSize & total_secondary_index_size = secondary_index_sizes[index.name]; + IndexSize part_secondary_index_size = part->getSecondaryIndexSize(index.name); + + auto log_subtract = [&](size_t & from, size_t value, const char * field) + { + if (value > from) + LOG_ERROR(log, "Possibly incorrect index size subtraction: {} - {} = {}, index: {}, field: {}", + from, value, from - value, index.name, field); + + from -= value; + }; + + log_subtract(total_secondary_index_size.data_compressed, part_secondary_index_size.data_compressed, ".data_compressed"); + log_subtract(total_secondary_index_size.data_uncompressed, part_secondary_index_size.data_uncompressed, ".data_uncompressed"); + log_subtract(total_secondary_index_size.marks, part_secondary_index_size.marks, ".marks"); } } @@ -4043,7 +4077,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: reduce_rows += covered_part->rows_count; data.modifyPartState(covered_part, DataPartState::Outdated); - data.removePartContributionToColumnSizes(covered_part); + data.removePartContributionToColumnAndSecondaryIndexSizes(covered_part); } reduce_parts += covered_parts.size(); @@ -4052,7 +4086,7 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: ++add_parts; data.modifyPartState(part, DataPartState::Committed); - data.addPartContributionToColumnSizes(part); + data.addPartContributionToColumnAndSecondaryIndexSizes(part); } } data.decreaseDataVolume(reduce_bytes, reduce_rows, reduce_parts); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index bdebd5e9187..0e0e84d011b 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -654,6 +654,12 @@ public: return column_sizes; } + IndexSizeByName getSecondaryIndexSizes() const override + { + auto lock = lockParts(); + return secondary_index_sizes; + } + /// For ATTACH/DETACH/DROP PARTITION. String getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr context) const; std::unordered_set getPartitionIDsFromQuery(const ASTs & asts, ContextPtr context) const; @@ -873,6 +879,9 @@ protected: /// Current column sizes in compressed and uncompressed form. ColumnSizeByName column_sizes; + /// Current secondary index sizes in compressed and uncompressed form. + IndexSizeByName secondary_index_sizes; + /// Engine-specific methods BrokenPartCallback broken_part_callback; @@ -1005,11 +1014,12 @@ protected: void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; - /// Calculates column sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked. - void calculateColumnSizesImpl(); - /// Adds or subtracts the contribution of the part to compressed column sizes. - void addPartContributionToColumnSizes(const DataPartPtr & part); - void removePartContributionToColumnSizes(const DataPartPtr & part); + /// Calculates column and secondary indexes sizes in compressed form for the current state of data_parts. Call with data_parts mutex locked. + void calculateColumnAndSecondaryIndexSizesImpl(); + + /// Adds or subtracts the contribution of the part to compressed column and secondary indexes sizes. + void addPartContributionToColumnAndSecondaryIndexSizes(const DataPartPtr & part); + void removePartContributionToColumnAndSecondaryIndexSizes(const DataPartPtr & part); /// If there is no part in the partition with ID `partition_id`, returns empty ptr. Should be called under the lock. DataPartPtr getAnyPartInPartition(const String & partition_id, DataPartsLock & data_parts_lock) const; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 5206f77290b..43146709686 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -87,7 +87,8 @@ void MergedBlockOutputStream::writeSuffixAndFinalizePart( new_part->checksums = checksums; new_part->setBytesOnDisk(checksums.getTotalSizeOnDisk()); new_part->index_granularity = writer->getIndexGranularity(); - new_part->calculateColumnsSizesOnDisk(); + new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); + if (default_codec != nullptr) new_part->default_codec = default_codec; new_part->storage.lockSharedData(*new_part); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index b8941fc9d84..115de043cd2 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -475,7 +475,7 @@ void finalizeMutatedPart( new_data_part->setBytesOnDisk( MergeTreeData::DataPart::calculateTotalSizeOnDisk(new_data_part->volume->getDisk(), new_data_part->getFullRelativePath())); new_data_part->default_codec = codec; - new_data_part->calculateColumnsSizesOnDisk(); + new_data_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); new_data_part->storage.lockSharedData(*new_data_part); } diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index 7a6ce4ec519..d7fc06da953 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -25,6 +25,9 @@ StorageSystemDataSkippingIndices::StorageSystemDataSkippingIndices(const Storage { "type", std::make_shared() }, { "expr", std::make_shared() }, { "granularity", std::make_shared() }, + { "data_compressed_bytes", std::make_shared() }, + { "data_uncompressed_bytes", std::make_shared() }, + { "marks", std::make_shared()} })); setInMemoryMetadata(storage_metadata); } @@ -97,6 +100,7 @@ protected: continue; const auto indices = metadata_snapshot->getSecondaryIndices(); + auto secondary_index_sizes = table->getSecondaryIndexSizes(); for (const auto & index : indices) { ++rows_count; @@ -127,6 +131,21 @@ protected: // 'granularity' column if (column_mask[src_index++]) res_columns[res_index++]->insert(index.granularity); + + auto & secondary_index_size = secondary_index_sizes[index.name]; + + // 'compressed bytes' column + if (column_mask[src_index++]) + res_columns[res_index++]->insert(secondary_index_size.data_compressed); + + // 'uncompressed bytes' column + + if (column_mask[src_index++]) + res_columns[res_index++]->insert(secondary_index_size.data_uncompressed); + + /// 'marks' column + if (column_mask[src_index++]) + res_columns[res_index++]->insert(secondary_index_size.marks); } } } diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index e79978463dd..6826082ef1d 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -30,6 +30,9 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"data_compressed_bytes", std::make_shared()}, {"data_uncompressed_bytes", std::make_shared()}, {"marks_bytes", std::make_shared()}, + {"secondary_indices_compressed_bytes", std::make_shared()}, + {"secondary_indices_uncompressed_bytes", std::make_shared()}, + {"secondary_indices_marks_bytes", std::make_shared()}, {"modification_time", std::make_shared()}, {"remove_time", std::make_shared()}, {"refcount", std::make_shared()}, @@ -98,6 +101,7 @@ void StorageSystemParts::processNextStorage( auto part_state = all_parts_state[part_number]; ColumnSize columns_size = part->getTotalColumnsSize(); + ColumnSize secondary_indexes_size = part->getTotalSeconaryIndicesSize(); size_t src_index = 0, res_index = 0; if (columns_mask[src_index++]) @@ -126,6 +130,12 @@ void StorageSystemParts::processNextStorage( columns[res_index++]->insert(columns_size.data_uncompressed); if (columns_mask[src_index++]) columns[res_index++]->insert(columns_size.marks); + if (columns_mask[src_index++]) + columns[res_index++]->insert(secondary_indexes_size.data_compressed); + if (columns_mask[src_index++]) + columns[res_index++]->insert(secondary_indexes_size.data_uncompressed); + if (columns_mask[src_index++]) + columns[res_index++]->insert(secondary_indexes_size.marks); if (columns_mask[src_index++]) columns[res_index++]->insert(static_cast(part->modification_time)); From ce0c41e1ad6a025a0117e7486ff9fd9a511c5be5 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Fri, 8 Oct 2021 23:43:16 +0300 Subject: [PATCH 2/5] Fixed tests --- src/Storages/MergeTree/MergeTreeData.cpp | 6 ------ ...1917_system_data_skipping_indices.reference | 10 +++++----- .../01932_alter_index_with_order.reference | 18 +++++++++--------- 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 10e5fe9e71f..1ede7669832 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2975,8 +2975,6 @@ static void loadPartAndFixMetadataImpl(MergeTreeData::MutableDataPartPtr part) void MergeTreeData::calculateColumnAndSecondaryIndexSizesImpl() { - std::cerr << "MergeTreeData::calculateColumnAndSecondaryIndexSizesImpl" << std::endl; - column_sizes.clear(); /// Take into account only committed parts @@ -2987,13 +2985,9 @@ void MergeTreeData::calculateColumnAndSecondaryIndexSizesImpl() void MergeTreeData::addPartContributionToColumnAndSecondaryIndexSizes(const DataPartPtr & part) { - std::cerr << "MergeTreeData::addPartContributionToColumnAndSecondaryIndexSizes " << part->name << std::endl; - for (const auto & column : part->getColumns()) { - std::cerr << "Column name " << column.name << std::endl; ColumnSize & total_column_size = column_sizes[column.name]; - std::cerr << "Total column size compressed " << total_column_size.data_compressed << " uncompressed size " << total_column_size.data_uncompressed << std::endl; ColumnSize part_column_size = part->getColumnSize(column.name, *column.type); total_column_size.add(part_column_size); } diff --git a/tests/queries/0_stateless/01917_system_data_skipping_indices.reference b/tests/queries/0_stateless/01917_system_data_skipping_indices.reference index b5a4b596a97..ca7e87e017b 100644 --- a/tests/queries/0_stateless/01917_system_data_skipping_indices.reference +++ b/tests/queries/0_stateless/01917_system_data_skipping_indices.reference @@ -1,8 +1,8 @@ -default data_01917 d1_idx minmax d1 1 -default data_01917 d1_null_idx minmax assumeNotNull(d1_null) 1 -default data_01917_2 memory set frequency * length(name) 5 -default data_01917_2 sample_index1 minmax length(name), name 4 -default data_01917_2 sample_index2 ngrambf_v1 lower(name), name 4 +test data_01917 d1_idx minmax d1 1 0 0 0 +test data_01917 d1_null_idx minmax assumeNotNull(d1_null) 1 0 0 0 +test data_01917_2 memory set frequency * length(name) 5 0 0 0 +test data_01917_2 sample_index1 minmax length(name), name 4 0 0 0 +test data_01917_2 sample_index2 ngrambf_v1 lower(name), name 4 0 0 0 2 3 d1_idx diff --git a/tests/queries/0_stateless/01932_alter_index_with_order.reference b/tests/queries/0_stateless/01932_alter_index_with_order.reference index 07e1aab3df9..eff9ea7da0e 100644 --- a/tests/queries/0_stateless/01932_alter_index_with_order.reference +++ b/tests/queries/0_stateless/01932_alter_index_with_order.reference @@ -1,9 +1,9 @@ -default alter_index_test index_a set a 1 -default alter_index_test index_b minmax b 1 -default alter_index_test index_c set c 2 -default alter_index_test index_a set a 1 -default alter_index_test index_d set d 1 -default alter_index_test index_b minmax b 1 -default alter_index_test index_c set c 2 -default alter_index_test index_a set a 1 -default alter_index_test index_d set d 1 +default alter_index_test index_a set a 1 0 0 0 +default alter_index_test index_b minmax b 1 0 0 0 +default alter_index_test index_c set c 2 0 0 0 +default alter_index_test index_a set a 1 0 0 0 +default alter_index_test index_d set d 1 0 0 0 +default alter_index_test index_b minmax b 1 0 0 0 +default alter_index_test index_c set c 2 0 0 0 +default alter_index_test index_a set a 1 0 0 0 +default alter_index_test index_d set d 1 0 0 0 From 61a725f53199697451200a2d24e0173347f8b9e2 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 10 Oct 2021 23:53:31 +0300 Subject: [PATCH 3/5] Fixed tests --- .../01917_system_data_skipping_indices.reference | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01917_system_data_skipping_indices.reference b/tests/queries/0_stateless/01917_system_data_skipping_indices.reference index ca7e87e017b..115d60f60cc 100644 --- a/tests/queries/0_stateless/01917_system_data_skipping_indices.reference +++ b/tests/queries/0_stateless/01917_system_data_skipping_indices.reference @@ -1,8 +1,8 @@ -test data_01917 d1_idx minmax d1 1 0 0 0 -test data_01917 d1_null_idx minmax assumeNotNull(d1_null) 1 0 0 0 -test data_01917_2 memory set frequency * length(name) 5 0 0 0 -test data_01917_2 sample_index1 minmax length(name), name 4 0 0 0 -test data_01917_2 sample_index2 ngrambf_v1 lower(name), name 4 0 0 0 +default data_01917 d1_idx minmax d1 1 0 0 0 +default data_01917 d1_null_idx minmax assumeNotNull(d1_null) 1 0 0 0 +default data_01917_2 memory set frequency * length(name) 5 0 0 0 +default data_01917_2 sample_index1 minmax length(name), name 4 0 0 0 +default data_01917_2 sample_index2 ngrambf_v1 lower(name), name 4 0 0 0 2 3 d1_idx From b0d887a0fef89fb529cff4f7c02cfab8cf75c280 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 11 Oct 2021 14:00:10 +0300 Subject: [PATCH 4/5] Added tests --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 1 + ...28_system_data_skipping_indices_size.reference | 1 + .../2028_system_data_skipping_indices_size.sql | 15 +++++++++++++++ 3 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/2028_system_data_skipping_indices_size.reference create mode 100644 tests/queries/0_stateless/2028_system_data_skipping_indices_size.sql diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 1a6290580a0..0f701cc4adf 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1452,6 +1452,7 @@ void IMergeTreeDataPart::calculateSecondaryIndicesSizesOnDisk() auto index_file_name = index_name_escaped + index_ptr->getSerializedFileExtension(); auto index_marks_file_name = index_name_escaped + index_granularity_info.marks_file_extension; + /// If part does not contain index auto bin_checksum = checksums.files.find(index_file_name); if (bin_checksum != checksums.files.end()) { diff --git a/tests/queries/0_stateless/2028_system_data_skipping_indices_size.reference b/tests/queries/0_stateless/2028_system_data_skipping_indices_size.reference new file mode 100644 index 00000000000..d0378511850 --- /dev/null +++ b/tests/queries/0_stateless/2028_system_data_skipping_indices_size.reference @@ -0,0 +1 @@ +default test_table value_index minmax value 1 38 12 24 diff --git a/tests/queries/0_stateless/2028_system_data_skipping_indices_size.sql b/tests/queries/0_stateless/2028_system_data_skipping_indices_size.sql new file mode 100644 index 00000000000..e77f88aa36f --- /dev/null +++ b/tests/queries/0_stateless/2028_system_data_skipping_indices_size.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test_table; + +CREATE TABLE test_table +( + key UInt64, + value String, + INDEX value_index value TYPE minmax GRANULARITY 1 +) +Engine=MergeTree() +ORDER BY key; + +INSERT INTO test_table VALUES (0, 'Value'); +SELECT * FROM system.data_skipping_indices WHERE database = currentDatabase(); + +DROP TABLE test_table; From c15b67c18264919df7c8048ea36ea5058a185d36 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 12 Oct 2021 11:42:24 +0300 Subject: [PATCH 5/5] Fix naming --- src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 1ede7669832..f9c26225440 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3006,7 +3006,7 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D for (const auto & column : part->getColumns()) { ColumnSize & total_column_size = column_sizes[column.name]; - ColumnSize part_secondary_index_size = part->getColumnSize(column.name, *column.type); + ColumnSize part_column_size = part->getColumnSize(column.name, *column.type); auto log_subtract = [&](size_t & from, size_t value, const char * field) { @@ -3017,9 +3017,9 @@ void MergeTreeData::removePartContributionToColumnAndSecondaryIndexSizes(const D from -= value; }; - log_subtract(total_column_size.data_compressed, part_secondary_index_size.data_compressed, ".data_compressed"); - log_subtract(total_column_size.data_uncompressed, part_secondary_index_size.data_uncompressed, ".data_uncompressed"); - log_subtract(total_column_size.marks, part_secondary_index_size.marks, ".marks"); + log_subtract(total_column_size.data_compressed, part_column_size.data_compressed, ".data_compressed"); + log_subtract(total_column_size.data_uncompressed, part_column_size.data_uncompressed, ".data_uncompressed"); + log_subtract(total_column_size.marks, part_column_size.marks, ".marks"); } auto indexes_descriptions = getInMemoryMetadataPtr()->secondary_indices;