mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
add sizes of subcolumns to system.parts_columns table
This commit is contained in:
parent
8a04ed72af
commit
0c210a831c
@ -7,6 +7,7 @@
|
||||
#include <DataTypes/DataTypeDateTime.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
@ -64,7 +65,11 @@ StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_
|
||||
{"serialization_kind", std::make_shared<DataTypeString>()},
|
||||
{"subcolumns.names", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"subcolumns.types", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"subcolumns.serializations", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}
|
||||
{"subcolumns.serializations", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"subcolumns.bytes_on_disk", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
{"subcolumns.data_compressed_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
{"subcolumns.data_uncompressed_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
{"subcolumns.marks_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
||||
}
|
||||
)
|
||||
{
|
||||
@ -228,13 +233,43 @@ void StorageSystemPartsColumns::processNextStorage(
|
||||
|
||||
Array subcolumn_names;
|
||||
Array subcolumn_types;
|
||||
Array subcolumn_sers;
|
||||
Array subcolumn_serializations;
|
||||
Array subcolumn_bytes_on_disk;
|
||||
Array subcolumn_data_compressed_bytes;
|
||||
Array subcolumn_data_uncompressed_bytes;
|
||||
Array subcolumn_marks_bytes;
|
||||
|
||||
IDataType::forEachSubcolumn([&](const auto &, const auto & name, const auto & data)
|
||||
IDataType::forEachSubcolumn([&](const auto & subpath, const auto & name, const auto & data)
|
||||
{
|
||||
/// We count only final subcolumns, which are represented by files on disk
|
||||
/// and skip intermediate suibcolumns of types Tuple and Nested.
|
||||
if (isTuple(data.type) || isNested(data.type))
|
||||
return;
|
||||
|
||||
subcolumn_names.push_back(name);
|
||||
subcolumn_types.push_back(data.type->getName());
|
||||
subcolumn_sers.push_back(ISerialization::kindToString(data.serialization->getKind()));
|
||||
subcolumn_serializations.push_back(ISerialization::kindToString(data.serialization->getKind()));
|
||||
|
||||
ColumnSize size;
|
||||
NameAndTypePair subcolumn(column.name, name, column.type, data.type);
|
||||
String file_name = ISerialization::getFileNameForStream(subcolumn, subpath);
|
||||
|
||||
auto bin_checksum = part->checksums.files.find(file_name + ".bin");
|
||||
if (bin_checksum != part->checksums.files.end())
|
||||
{
|
||||
size.data_compressed += bin_checksum->second.file_size;
|
||||
size.data_uncompressed += bin_checksum->second.uncompressed_size;
|
||||
}
|
||||
|
||||
auto mrk_checksum = part->checksums.files.find(file_name + part->index_granularity_info.marks_file_extension);
|
||||
if (mrk_checksum != part->checksums.files.end())
|
||||
size.marks += mrk_checksum->second.file_size;
|
||||
|
||||
subcolumn_bytes_on_disk.push_back(size.data_compressed + size.marks);
|
||||
subcolumn_data_compressed_bytes.push_back(size.data_compressed);
|
||||
subcolumn_data_uncompressed_bytes.push_back(size.data_uncompressed);
|
||||
subcolumn_marks_bytes.push_back(size.marks);
|
||||
|
||||
}, { serialization, column.type, nullptr, nullptr });
|
||||
|
||||
if (columns_mask[src_index++])
|
||||
@ -242,7 +277,15 @@ void StorageSystemPartsColumns::processNextStorage(
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_types);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_sers);
|
||||
columns[res_index++]->insert(subcolumn_serializations);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_bytes_on_disk);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_data_compressed_bytes);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_data_uncompressed_bytes);
|
||||
if (columns_mask[src_index++])
|
||||
columns[res_index++]->insert(subcolumn_marks_bytes);
|
||||
|
||||
if (has_state_column)
|
||||
columns[res_index++]->insert(part->stateString());
|
||||
|
@ -0,0 +1,8 @@
|
||||
arr size0 UInt64 1
|
||||
d k1 String 1
|
||||
d k2.k3 Array(String) 1
|
||||
d k2.k4 Array(String) 1
|
||||
d k2.k5 Array(Int8) 1
|
||||
d k2.size0 UInt64 1
|
||||
n null UInt8 1
|
||||
1 1 1 1
|
32
tests/queries/0_stateless/02242_subcolumns_sizes.sql
Normal file
32
tests/queries/0_stateless/02242_subcolumns_sizes.sql
Normal file
@ -0,0 +1,32 @@
|
||||
DROP TABLE IF EXISTS t_subcolumns_sizes;
|
||||
|
||||
SET allow_experimental_object_type = 1;
|
||||
|
||||
CREATE TABLE t_subcolumns_sizes (id UInt64, arr Array(UInt64), n Nullable(String), d JSON)
|
||||
ENGINE = MergeTree ORDER BY id
|
||||
SETTINGS min_bytes_for_wide_part = 0;
|
||||
|
||||
INSERT INTO t_subcolumns_sizes FORMAT JSONEachRow {"id": 1, "arr": [1, 2, 3], "n": null, "d": {"k1": "v1", "k2": [{"k3": 1, "k4": "v2"}, {"k3": 3}]}}
|
||||
INSERT INTO t_subcolumns_sizes FORMAT JSONEachRow {"id": 2, "arr": [0], "n": "foo", "d": {"k1": "v3", "k2": [{"k4": "v4"}, {"k3": "v5", "k5": 5}]}}
|
||||
|
||||
OPTIMIZE TABLE t_subcolumns_sizes FINAL;
|
||||
|
||||
SELECT
|
||||
column,
|
||||
subcolumns.names AS sname,
|
||||
subcolumns.types AS stype,
|
||||
subcolumns.bytes_on_disk > 0
|
||||
FROM system.parts_columns ARRAY JOIN subcolumns
|
||||
WHERE database = currentDatabase() AND table = 't_subcolumns_sizes' AND active
|
||||
ORDER BY column, sname, stype;
|
||||
|
||||
SELECT
|
||||
any(column_bytes_on_disk) = sum(subcolumns.bytes_on_disk),
|
||||
any(column_data_compressed_bytes) = sum(subcolumns.data_compressed_bytes),
|
||||
any(column_data_uncompressed_bytes) = sum(subcolumns.data_uncompressed_bytes),
|
||||
any(column_marks_bytes) = sum(subcolumns.marks_bytes)
|
||||
FROM system.parts_columns ARRAY JOIN subcolumns
|
||||
WHERE database = currentDatabase() AND table = 't_subcolumns_sizes'
|
||||
AND active AND column = 'd';
|
||||
|
||||
DROP TABLE IF EXISTS t_subcolumns_sizes;
|
Loading…
Reference in New Issue
Block a user