Merge pull request #52518 from CurtizJ/add-refcount-to-system-table

Added field `refcount` to `system.remote_data_paths` table
This commit is contained in:
Alexey Milovidov 2023-07-27 12:13:24 +03:00 committed by GitHub
commit 17b647f6b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 86 additions and 4 deletions

View File

@ -303,10 +303,11 @@ public:
std::string local_path; std::string local_path;
std::string common_prefix_for_objects; std::string common_prefix_for_objects;
StoredObjects objects; StoredObjects objects;
size_t refcount;
LocalPathWithObjectStoragePaths( LocalPathWithObjectStoragePaths(
const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_) const std::string & local_path_, const std::string & common_prefix_for_objects_, StoredObjects && objects_, size_t refcount_)
: local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)) {} : local_path(local_path_), common_prefix_for_objects(common_prefix_for_objects_), objects(std::move(objects_)), refcount(refcount_) {}
}; };
virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithObjectStoragePaths> &) virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithObjectStoragePaths> &)

View File

@ -82,7 +82,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::
{ {
try try
{ {
paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path)); paths_map.emplace_back(local_path, metadata_storage->getObjectStorageRootPath(), getStorageObjects(local_path), metadata_storage->getHardlinkCount(local_path));
} }
catch (const Exception & e) catch (const Exception & e)
{ {

View File

@ -25,6 +25,7 @@ StorageSystemRemoteDataPaths::StorageSystemRemoteDataPaths(const StorageID & tab
{"local_path", std::make_shared<DataTypeString>()}, {"local_path", std::make_shared<DataTypeString>()},
{"remote_path", std::make_shared<DataTypeString>()}, {"remote_path", std::make_shared<DataTypeString>()},
{"size", std::make_shared<DataTypeUInt64>()}, {"size", std::make_shared<DataTypeUInt64>()},
{"refcount", std::make_shared<DataTypeUInt64>()},
{"common_prefix_for_blobs", std::make_shared<DataTypeString>()}, {"common_prefix_for_blobs", std::make_shared<DataTypeString>()},
{"cache_paths", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}, {"cache_paths", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
})); }));
@ -48,6 +49,7 @@ Pipe StorageSystemRemoteDataPaths::read(
MutableColumnPtr col_local_path = ColumnString::create(); MutableColumnPtr col_local_path = ColumnString::create();
MutableColumnPtr col_remote_path = ColumnString::create(); MutableColumnPtr col_remote_path = ColumnString::create();
MutableColumnPtr col_size = ColumnUInt64::create(); MutableColumnPtr col_size = ColumnUInt64::create();
MutableColumnPtr col_refcount = ColumnUInt64::create();
MutableColumnPtr col_namespace = ColumnString::create(); MutableColumnPtr col_namespace = ColumnString::create();
MutableColumnPtr col_cache_paths = ColumnArray::create(ColumnString::create()); MutableColumnPtr col_cache_paths = ColumnArray::create(ColumnString::create());
@ -65,19 +67,22 @@ Pipe StorageSystemRemoteDataPaths::read(
if (disk->supportsCache()) if (disk->supportsCache())
cache = FileCacheFactory::instance().getByName(disk->getCacheName()).cache; cache = FileCacheFactory::instance().getByName(disk->getCacheName()).cache;
for (const auto & [local_path, common_prefox_for_objects, storage_objects] : remote_paths_by_local_path) for (const auto & [local_path, common_prefox_for_objects, storage_objects, refcount] : remote_paths_by_local_path)
{ {
for (const auto & object : storage_objects) for (const auto & object : storage_objects)
{ {
col_disk_name->insert(disk_name); col_disk_name->insert(disk_name);
col_base_path->insert(disk->getPath()); col_base_path->insert(disk->getPath());
if (cache) if (cache)
col_cache_base_path->insert(cache->getBasePath()); col_cache_base_path->insert(cache->getBasePath());
else else
col_cache_base_path->insertDefault(); col_cache_base_path->insertDefault();
col_local_path->insert(local_path); col_local_path->insert(local_path);
col_remote_path->insert(object.remote_path); col_remote_path->insert(object.remote_path);
col_size->insert(object.bytes_size); col_size->insert(object.bytes_size);
col_refcount->insert(refcount);
col_namespace->insert(common_prefox_for_objects); col_namespace->insert(common_prefox_for_objects);
if (cache) if (cache)
@ -101,6 +106,7 @@ Pipe StorageSystemRemoteDataPaths::read(
res_columns.emplace_back(std::move(col_local_path)); res_columns.emplace_back(std::move(col_local_path));
res_columns.emplace_back(std::move(col_remote_path)); res_columns.emplace_back(std::move(col_remote_path));
res_columns.emplace_back(std::move(col_size)); res_columns.emplace_back(std::move(col_size));
res_columns.emplace_back(std::move(col_refcount));
res_columns.emplace_back(std::move(col_namespace)); res_columns.emplace_back(std::move(col_namespace));
res_columns.emplace_back(std::move(col_cache_paths)); res_columns.emplace_back(std::move(col_cache_paths));

View File

@ -0,0 +1,28 @@
0_0_0_0 0
0_0_0_0_1 1
1_0_0_0 0
1_0_0_0_1 1
0_0_0_0_1 checksums.txt 0
0_0_0_0_1 columns.txt 1
0_0_0_0_1 count.txt 1
0_0_0_0_1 default_compression_codec.txt 1
0_0_0_0_1 id.bin 1
0_0_0_0_1 id.cmrk2 1
0_0_0_0_1 metadata_version.txt 1
0_0_0_0_1 minmax_id.idx 1
0_0_0_0_1 partition.dat 1
0_0_0_0_1 primary.cidx 1
0_0_0_0_1 v.bin 1
0_0_0_0_1 v.cmrk2 1
1_0_0_0_1 checksums.txt 0
1_0_0_0_1 columns.txt 0
1_0_0_0_1 count.txt 1
1_0_0_0_1 default_compression_codec.txt 0
1_0_0_0_1 id.bin 1
1_0_0_0_1 id.cmrk2 1
1_0_0_0_1 metadata_version.txt 0
1_0_0_0_1 minmax_id.idx 1
1_0_0_0_1 partition.dat 1
1_0_0_0_1 primary.cidx 1
1_0_0_0_1 v.bin 0
1_0_0_0_1 v.cmrk2 0

View File

@ -0,0 +1,47 @@
-- Tags: no-fasttest
DROP TABLE IF EXISTS t_refcount SYNC;
-- Names of parts (on which this test depends)
-- can differ in case of fault injection.
SET insert_keeper_fault_injection_probability = 0.0;
CREATE TABLE t_refcount (id UInt64, v UInt64)
ENGINE = ReplicatedMergeTree('/clickhouse/test/{database}/t_refcount', '1')
ORDER BY id PARTITION BY id % 2
SETTINGS
storage_policy = 's3_cache',
allow_remote_fs_zero_copy_replication = 1,
min_bytes_for_wide_part = 0,
compress_marks = 1,
compress_primary_key = 1,
ratio_of_defaults_for_sparse_serialization = 1.0;
INSERT INTO t_refcount VALUES (1, 10), (2, 20);
SET mutations_sync = 2;
ALTER TABLE t_refcount UPDATE v = v * 10 WHERE id % 2 = 1;
SELECT name, active FROM system.parts WHERE database = currentDatabase() AND table = 't_refcount' ORDER BY name;
WITH splitByChar('/', full_path) AS path_parts
SELECT path_parts[-2] AS part_name, path_parts[-1] AS file_name, refcount
FROM
(
SELECT
path || local_path AS full_path,
substring(full_path, 1, length(full_path) - position(reverse(full_path), '/') + 1) AS part_path,
refcount
FROM system.remote_data_paths
WHERE disk_name = 's3_cache'
) AS paths
INNER JOIN
(
SELECT path
FROM system.parts
WHERE database = currentDatabase() AND table = 't_refcount' AND active
) AS parts
ON paths.part_path = parts.path
ORDER BY part_name, file_name;
DROP TABLE IF EXISTS t_refcount SYNC;