From 3ea19832b9114f9367209fac70ec872f66b48fb0 Mon Sep 17 00:00:00 2001 From: Jordi Date: Sun, 29 Sep 2024 13:06:56 +0200 Subject: [PATCH 1/2] Add projections size to system.projections --- .../operations/system-tables/projections.md | 33 ++++++++++++------- .../System/StorageSystemProjections.cpp | 33 ++++++++++++++++++- .../03230_system_projections.reference | 6 ++-- .../0_stateless/03230_system_projections.sql | 17 ++++++++-- 4 files changed, 71 insertions(+), 18 deletions(-) diff --git a/docs/en/operations/system-tables/projections.md b/docs/en/operations/system-tables/projections.md index e9c88f731b2..278582d4812 100644 --- a/docs/en/operations/system-tables/projections.md +++ b/docs/en/operations/system-tables/projections.md @@ -11,6 +11,9 @@ Columns: - `table` ([String](../../sql-reference/data-types/string.md)) — Table name. - `name` ([String](../../sql-reference/data-types/string.md)) — Projection name. - `type` ([Enum](../../sql-reference/data-types/enum.md)) — Projection type ('Normal' = 0, 'Aggregate' = 1). +- `total_rows`, ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total number of rows. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. - `sorting_key` ([Array(String)](../../sql-reference/data-types/array.md)) — Projection sorting key. - `query` ([String](../../sql-reference/data-types/string.md)) — Projection query. @@ -23,19 +26,25 @@ SELECT * FROM system.projections LIMIT 2 FORMAT Vertical; ```text Row 1: ────── -database: default -table: landing -name: improved_sorting_key -type: Normal -sorting_key: ['user_id','date'] -query: SELECT * ORDER BY user_id, date +database: default +table: landing +name: improved_sorting_key +type: Normal +total_rows: 1000 +data_compressed_bytes: 8081 +data_uncompressed_bytes: 12890 +sorting_key: ['user_id','date'] +query: SELECT * ORDER BY user_id, date Row 2: ────── -database: default -table: landing -name: agg_no_key -type: Aggregate -sorting_key: [] -query: SELECT count() +database: default +table: landing +name: agg +type: Aggregate +total_rows: 2 +data_compressed_bytes: 82 +data_uncompressed_bytes: 32 +sorting_key: ['user_id'] +query: SELECT user_id, max(date) AS max_date GROUP BY user_id ``` diff --git a/src/Storages/System/StorageSystemProjections.cpp b/src/Storages/System/StorageSystemProjections.cpp index ae76f11f7cf..3c171384f70 100644 --- a/src/Storages/System/StorageSystemProjections.cpp +++ b/src/Storages/System/StorageSystemProjections.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,9 @@ StorageSystemProjections::StorageSystemProjections(const StorageID & table_id_) { "table", std::make_shared(), "Table name."}, { "name", std::make_shared(), "Projection name."}, { "type", std::move(projection_type_datatype), "Projection type."}, + { "total_rows", std::make_shared(), "Total number of rows."}, + { "data_compressed_bytes", std::make_shared(), "The size of compressed data, in bytes."}, + { "data_uncompressed_bytes", std::make_shared(), "The size of decompressed data, in bytes."}, { "sorting_key", std::make_shared(std::make_shared()), "Projection sorting key."}, { "query", std::make_shared(), "Projection query."}, })); @@ -108,13 +112,15 @@ protected: continue; const auto table = tables_it->table(); - if (!table) + if (!table || !table->isMergeTree()) continue; StorageMetadataPtr metadata_snapshot = table->getInMemoryMetadataPtr(); if (!metadata_snapshot) continue; const auto & projections = metadata_snapshot->getProjections(); + auto parts = dynamic_cast(table.get())->getDataPartsVectorForInternalUsage({MergeTreeData::DataPartState::Active}, nullptr); + for (const auto & projection : projections) { ++rows_count; @@ -134,6 +140,31 @@ protected: // 'type' column if (column_mask[src_index++]) res_columns[res_index++]->insert(projection.type); + + size_t total_rows = 0; + size_t data_compressed = 0; + size_t data_uncompressed = 0; + for (const auto & part : parts) + { + auto projection_parts = part->getProjectionParts(); + auto projection_part = projection_parts[projection.name]; + auto column_size = projection_part->getTotalColumnsSize(); + + total_rows += projection_part->rows_count; + data_compressed += column_size.data_compressed; + data_uncompressed += column_size.data_uncompressed; + } + + // 'total_rows' column + if (column_mask[src_index++]) + res_columns[res_index++]->insert(total_rows); + // 'data_compressed_bytes' column + if (column_mask[src_index++]) + res_columns[res_index++]->insert(data_compressed); + // 'data_uncompressed_bytes' column + if (column_mask[src_index++]) + res_columns[res_index++]->insert(data_uncompressed); + // 'sorting_key' column if (column_mask[src_index++]) { diff --git a/tests/queries/0_stateless/03230_system_projections.reference b/tests/queries/0_stateless/03230_system_projections.reference index 22ca272955c..68f1a6c0435 100644 --- a/tests/queries/0_stateless/03230_system_projections.reference +++ b/tests/queries/0_stateless/03230_system_projections.reference @@ -1,6 +1,6 @@ -default projections improved_sorting_key Normal ['d1','key'] SELECT * ORDER BY d1, key -default projections_2 agg Aggregate ['name'] SELECT name, max(frequency) AS max_frequency GROUP BY name -default projections_2 agg_no_key Aggregate [] SELECT max(frequency) AS max_frequency +default projections improved_sorting_key Normal 1000 1 1 ['d1','key'] SELECT * ORDER BY d1, key +default projections_2 agg Aggregate 2 1 1 ['name'] SELECT name, max(frequency) AS max_frequency GROUP BY name +default projections_2 agg_no_key Aggregate 1 1 1 [] SELECT max(frequency) AS max_frequency 1 2 improved_sorting_key diff --git a/tests/queries/0_stateless/03230_system_projections.sql b/tests/queries/0_stateless/03230_system_projections.sql index 37c1e5df8ef..578ac606c8a 100644 --- a/tests/queries/0_stateless/03230_system_projections.sql +++ b/tests/queries/0_stateless/03230_system_projections.sql @@ -28,7 +28,20 @@ CREATE TABLE projections_2 Engine=MergeTree() ORDER BY name; -SELECT * FROM system.projections WHERE database = currentDatabase(); +INSERT INTO projections SELECT 'name_' || number AS key, number AS d1 FROM numbers(1000); +INSERT INTO projections_2 SELECT 'name_' || number % 2 AS name, number AS frequency FROM numbers(1000); + +SELECT + database, + table, + name, + type, + total_rows, + data_compressed_bytes > 0, + data_uncompressed_bytes > 0, + sorting_key, + query +FROM system.projections WHERE database = currentDatabase(); SELECT count(*) FROM system.projections WHERE table = 'projections' AND database = currentDatabase(); SELECT count(*) FROM system.projections WHERE table = 'projections_2' AND database = currentDatabase(); @@ -36,4 +49,4 @@ SELECT count(*) FROM system.projections WHERE table = 'projections_2' AND databa SELECT name FROM system.projections WHERE type = 'Normal' AND database = currentDatabase(); DROP TABLE projections; -DROP TABLE projections_2; \ No newline at end of file +DROP TABLE projections_2; From 21892bdf0de08b4d222021e9f297e76760dda5b8 Mon Sep 17 00:00:00 2001 From: Jordi Date: Mon, 30 Sep 2024 09:25:17 +0200 Subject: [PATCH 2/2] Improve cast --- src/Storages/System/StorageSystemProjections.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemProjections.cpp b/src/Storages/System/StorageSystemProjections.cpp index 3c171384f70..f1cdf110641 100644 --- a/src/Storages/System/StorageSystemProjections.cpp +++ b/src/Storages/System/StorageSystemProjections.cpp @@ -119,7 +119,7 @@ protected: continue; const auto & projections = metadata_snapshot->getProjections(); - auto parts = dynamic_cast(table.get())->getDataPartsVectorForInternalUsage({MergeTreeData::DataPartState::Active}, nullptr); + auto parts = static_cast(*table).getDataPartsVectorForInternalUsage({MergeTreeData::DataPartState::Active}, nullptr); for (const auto & projection : projections) {