mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-16 11:22:12 +00:00
22e44ced8a
This can be useful to obtain at least some time for the part after mutations, since mutations will change the modification time of all parts. Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
316 lines
16 KiB
C++
316 lines
16 KiB
C++
#include "StorageSystemPartsColumns.h"
|
|
|
|
#include <Common/escapeForFileName.h>
|
|
#include <Columns/ColumnString.h>
|
|
#include <DataTypes/DataTypeString.h>
|
|
#include <DataTypes/DataTypesNumber.h>
|
|
#include <DataTypes/DataTypeDateTime.h>
|
|
#include <DataTypes/DataTypeDate.h>
|
|
#include <DataTypes/DataTypeArray.h>
|
|
#include <DataTypes/DataTypeNested.h>
|
|
#include <DataTypes/DataTypeNullable.h>
|
|
#include <DataTypes/NestedUtils.h>
|
|
#include <DataTypes/DataTypeUUID.h>
|
|
#include <Storages/VirtualColumnUtils.h>
|
|
#include <Databases/IDatabase.h>
|
|
#include <Parsers/queryToString.h>
|
|
|
|
namespace DB
|
|
{
|
|
|
|
|
|
StorageSystemPartsColumns::StorageSystemPartsColumns(const StorageID & table_id_)
|
|
: StorageSystemPartsBase(table_id_,
|
|
{
|
|
{"partition", std::make_shared<DataTypeString>()},
|
|
{"name", std::make_shared<DataTypeString>()},
|
|
{"uuid", std::make_shared<DataTypeUUID>()},
|
|
{"part_type", std::make_shared<DataTypeString>()},
|
|
{"active", std::make_shared<DataTypeUInt8>()},
|
|
{"marks", std::make_shared<DataTypeUInt64>()},
|
|
{"rows", std::make_shared<DataTypeUInt64>()},
|
|
{"bytes_on_disk", std::make_shared<DataTypeUInt64>()},
|
|
{"data_compressed_bytes", std::make_shared<DataTypeUInt64>()},
|
|
{"data_uncompressed_bytes", std::make_shared<DataTypeUInt64>()},
|
|
{"marks_bytes", std::make_shared<DataTypeUInt64>()},
|
|
{"modification_time", std::make_shared<DataTypeDateTime>()},
|
|
{"remove_time", std::make_shared<DataTypeDateTime>()},
|
|
{"refcount", std::make_shared<DataTypeUInt32>()},
|
|
{"min_date", std::make_shared<DataTypeDate>()},
|
|
{"max_date", std::make_shared<DataTypeDate>()},
|
|
{"min_time", std::make_shared<DataTypeDateTime>()},
|
|
{"max_time", std::make_shared<DataTypeDateTime>()},
|
|
{"partition_id", std::make_shared<DataTypeString>()},
|
|
{"min_block_number", std::make_shared<DataTypeInt64>()},
|
|
{"max_block_number", std::make_shared<DataTypeInt64>()},
|
|
{"level", std::make_shared<DataTypeUInt32>()},
|
|
{"data_version", std::make_shared<DataTypeUInt64>()},
|
|
{"primary_key_bytes_in_memory", std::make_shared<DataTypeUInt64>()},
|
|
{"primary_key_bytes_in_memory_allocated", std::make_shared<DataTypeUInt64>()},
|
|
|
|
{"database", std::make_shared<DataTypeString>()},
|
|
{"table", std::make_shared<DataTypeString>()},
|
|
{"engine", std::make_shared<DataTypeString>()},
|
|
{"disk_name", std::make_shared<DataTypeString>()},
|
|
{"path", std::make_shared<DataTypeString>()},
|
|
|
|
{"column", std::make_shared<DataTypeString>()},
|
|
{"type", std::make_shared<DataTypeString>()},
|
|
{"column_position", std::make_shared<DataTypeUInt64>()},
|
|
{"default_kind", std::make_shared<DataTypeString>()},
|
|
{"default_expression", std::make_shared<DataTypeString>()},
|
|
{"column_bytes_on_disk", std::make_shared<DataTypeUInt64>()},
|
|
{"column_data_compressed_bytes", std::make_shared<DataTypeUInt64>()},
|
|
{"column_data_uncompressed_bytes", std::make_shared<DataTypeUInt64>()},
|
|
{"column_marks_bytes", std::make_shared<DataTypeUInt64>()},
|
|
{"column_modification_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
|
|
|
|
{"serialization_kind", std::make_shared<DataTypeString>()},
|
|
{"subcolumns.names", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
|
{"subcolumns.types", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
|
{"subcolumns.serializations", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
|
{"subcolumns.bytes_on_disk", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
|
{"subcolumns.data_compressed_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
|
{"subcolumns.data_uncompressed_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
|
{"subcolumns.marks_bytes", std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>())},
|
|
}
|
|
)
|
|
{
|
|
}
|
|
|
|
void StorageSystemPartsColumns::processNextStorage(
|
|
ContextPtr, MutableColumns & columns, std::vector<UInt8> & columns_mask, const StoragesInfo & info, bool has_state_column)
|
|
{
|
|
/// Prepare information about columns in storage.
|
|
struct ColumnInfo
|
|
{
|
|
String default_kind;
|
|
String default_expression;
|
|
};
|
|
|
|
std::unordered_map<String, ColumnInfo> columns_info;
|
|
for (const auto & column : info.storage->getInMemoryMetadataPtr()->getColumns())
|
|
{
|
|
ColumnInfo column_info;
|
|
if (column.default_desc.expression)
|
|
{
|
|
column_info.default_kind = toString(column.default_desc.kind);
|
|
column_info.default_expression = queryToString(column.default_desc.expression);
|
|
}
|
|
|
|
columns_info[column.name] = column_info;
|
|
}
|
|
|
|
/// Go through the list of parts.
|
|
MergeTreeData::DataPartStateVector all_parts_state;
|
|
MergeTreeData::DataPartsVector all_parts;
|
|
all_parts = info.getParts(all_parts_state, has_state_column);
|
|
for (size_t part_number = 0; part_number < all_parts.size(); ++part_number)
|
|
{
|
|
const auto & part = all_parts[part_number];
|
|
auto part_state = all_parts_state[part_number];
|
|
auto columns_size = part->getTotalColumnsSize();
|
|
|
|
/// For convenience, in returned refcount, don't add references that was due to local variables in this method: all_parts, active_parts.
|
|
auto use_count = part.use_count() - 1;
|
|
|
|
auto min_max_date = part->getMinMaxDate();
|
|
auto min_max_time = part->getMinMaxTime();
|
|
|
|
auto index_size_in_bytes = part->getIndexSizeInBytes();
|
|
auto index_size_in_allocated_bytes = part->getIndexSizeInAllocatedBytes();
|
|
|
|
using State = MergeTreeDataPartState;
|
|
|
|
size_t column_position = 0;
|
|
for (const auto & column : part->getColumns())
|
|
{
|
|
++column_position;
|
|
size_t src_index = 0, res_index = 0;
|
|
if (columns_mask[src_index++])
|
|
{
|
|
WriteBufferFromOwnString out;
|
|
part->partition.serializeText(*info.data, out, format_settings);
|
|
columns[res_index++]->insert(out.str());
|
|
}
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->name);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->uuid);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->getTypeName());
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part_state == State::Active);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->getMarksCount());
|
|
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->rows_count);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->getBytesOnDisk());
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(columns_size.data_compressed);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(columns_size.data_uncompressed);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(columns_size.marks);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(UInt64(part->modification_time));
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(UInt64(part->remove_time.load(std::memory_order_relaxed)));
|
|
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(UInt64(use_count));
|
|
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(min_max_date.first);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(min_max_date.second);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(static_cast<UInt32>(min_max_time.first));
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(static_cast<UInt32>(min_max_time.second));
|
|
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->info.partition_id);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->info.min_block);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->info.max_block);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->info.level);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(UInt64(part->info.getDataVersion()));
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(index_size_in_bytes);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(index_size_in_allocated_bytes);
|
|
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(info.database);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(info.table);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(info.engine);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(part->getDataPartStorage().getDiskName());
|
|
if (columns_mask[src_index++])
|
|
{
|
|
/// The full path changes at clean up thread, so do not read it if parts can be deleted, avoid the race.
|
|
if (part->isStoredOnDisk()
|
|
&& part_state != State::Deleting && part_state != State::DeleteOnDestroy && part_state != State::Temporary)
|
|
{
|
|
columns[res_index++]->insert(part->getDataPartStorage().getFullPath());
|
|
}
|
|
else
|
|
columns[res_index++]->insertDefault();
|
|
}
|
|
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column.name);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column.type->getName());
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column_position);
|
|
|
|
auto column_info_it = columns_info.find(column.name);
|
|
if (column_info_it != columns_info.end())
|
|
{
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column_info_it->second.default_kind);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column_info_it->second.default_expression);
|
|
}
|
|
else
|
|
{
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insertDefault();
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insertDefault();
|
|
}
|
|
|
|
ColumnSize column_size = part->getColumnSize(column.name);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column_size.data_compressed + column_size.marks);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column_size.data_compressed);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column_size.data_uncompressed);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(column_size.marks);
|
|
if (columns_mask[src_index++])
|
|
{
|
|
if (auto column_modification_time = part->getColumnModificationTime(column.name))
|
|
columns[res_index++]->insert(UInt64(column_modification_time.value()));
|
|
else
|
|
columns[res_index++]->insertDefault();
|
|
}
|
|
|
|
auto serialization = part->getSerialization(column.name);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(ISerialization::kindToString(serialization->getKind()));
|
|
|
|
Array subcolumn_names;
|
|
Array subcolumn_types;
|
|
Array subcolumn_serializations;
|
|
Array subcolumn_bytes_on_disk;
|
|
Array subcolumn_data_compressed_bytes;
|
|
Array subcolumn_data_uncompressed_bytes;
|
|
Array subcolumn_marks_bytes;
|
|
|
|
IDataType::forEachSubcolumn([&](const auto & subpath, const auto & name, const auto & data)
|
|
{
|
|
/// We count only final subcolumns, which are represented by files on disk
|
|
/// and skip intermediate subcolumns of types Tuple and Nested.
|
|
if (isTuple(data.type) || isNested(data.type))
|
|
return;
|
|
|
|
subcolumn_names.push_back(name);
|
|
subcolumn_types.push_back(data.type->getName());
|
|
subcolumn_serializations.push_back(ISerialization::kindToString(data.serialization->getKind()));
|
|
|
|
ColumnSize size;
|
|
NameAndTypePair subcolumn(column.name, name, column.type, data.type);
|
|
String file_name = ISerialization::getFileNameForStream(subcolumn, subpath);
|
|
|
|
auto bin_checksum = part->checksums.files.find(file_name + ".bin");
|
|
if (bin_checksum != part->checksums.files.end())
|
|
{
|
|
size.data_compressed += bin_checksum->second.file_size;
|
|
size.data_uncompressed += bin_checksum->second.uncompressed_size;
|
|
}
|
|
|
|
auto mrk_checksum = part->checksums.files.find(file_name + part->index_granularity_info.mark_type.getFileExtension());
|
|
if (mrk_checksum != part->checksums.files.end())
|
|
size.marks += mrk_checksum->second.file_size;
|
|
|
|
subcolumn_bytes_on_disk.push_back(size.data_compressed + size.marks);
|
|
subcolumn_data_compressed_bytes.push_back(size.data_compressed);
|
|
subcolumn_data_uncompressed_bytes.push_back(size.data_uncompressed);
|
|
subcolumn_marks_bytes.push_back(size.marks);
|
|
|
|
}, ISerialization::SubstreamData(serialization).withType(column.type));
|
|
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(subcolumn_names);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(subcolumn_types);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(subcolumn_serializations);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(subcolumn_bytes_on_disk);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(subcolumn_data_compressed_bytes);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(subcolumn_data_uncompressed_bytes);
|
|
if (columns_mask[src_index++])
|
|
columns[res_index++]->insert(subcolumn_marks_bytes);
|
|
|
|
if (has_state_column)
|
|
columns[res_index++]->insert(part->stateString());
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|