mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge pull request #60585 from aalexfvk/traverse_shadow_remote_data_paths
Traverse shadow directory for system.remote_data_paths
This commit is contained in:
commit
0c824dab17
@ -859,6 +859,7 @@ class IColumn;
|
||||
M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \
|
||||
M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \
|
||||
M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \
|
||||
M(Bool, traverse_shadow_remote_data_paths, false, "Traverse shadow directory when query system.remote_data_paths", 0) \
|
||||
\
|
||||
/** Experimental functions */ \
|
||||
M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \
|
||||
|
@ -92,6 +92,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
|
||||
{"page_cache_inject_eviction", false, false, "Added userspace page cache"},
|
||||
{"default_table_engine", "None", "MergeTree", "Set default table engine to MergeTree for better usability"},
|
||||
{"input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects", false, false, "Allow to use String type for ambiguous paths during named tuple inference from JSON objects"},
|
||||
{"traverse_shadow_remote_data_paths", false, false, "Traverse shadow directory when query system.remote_data_paths."},
|
||||
{"throw_if_deduplication_in_dependent_materialized_views_enabled_with_async_insert", false, true, "Deduplication is dependent materialized view cannot work together with async inserts."},
|
||||
{"parallel_replicas_allow_in_with_subquery", false, true, "If true, subquery for IN will be executed on every follower replica"},
|
||||
{"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."},
|
||||
|
@ -320,9 +320,11 @@ public:
|
||||
{}
|
||||
};
|
||||
|
||||
virtual void getRemotePathsRecursive(const String &, std::vector<LocalPathWithObjectStoragePaths> &)
|
||||
virtual void getRemotePathsRecursive(
|
||||
const String &, std::vector<LocalPathWithObjectStoragePaths> &, const std::function<bool(const String &)> & /* skip_predicate */)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
|
||||
throw Exception(
|
||||
ErrorCodes::NOT_IMPLEMENTED,
|
||||
"Method `getRemotePathsRecursive() not implemented for disk: {}`",
|
||||
getDataSourceDescription().toString());
|
||||
}
|
||||
|
@ -91,11 +91,17 @@ StoredObjects DiskObjectStorage::getStorageObjects(const String & local_path) co
|
||||
return metadata_storage->getStorageObjects(local_path);
|
||||
}
|
||||
|
||||
void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::vector<LocalPathWithObjectStoragePaths> & paths_map)
|
||||
void DiskObjectStorage::getRemotePathsRecursive(
|
||||
const String & local_path,
|
||||
std::vector<LocalPathWithObjectStoragePaths> & paths_map,
|
||||
const std::function<bool(const String &)> & skip_predicate)
|
||||
{
|
||||
if (!metadata_storage->exists(local_path))
|
||||
return;
|
||||
|
||||
if (skip_predicate && skip_predicate(local_path))
|
||||
return;
|
||||
|
||||
/// Protect against concurrent delition of files (for example because of a merge).
|
||||
if (metadata_storage->isFile(local_path))
|
||||
{
|
||||
@ -143,7 +149,7 @@ void DiskObjectStorage::getRemotePathsRecursive(const String & local_path, std::
|
||||
}
|
||||
|
||||
for (; it->isValid(); it->next())
|
||||
DiskObjectStorage::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map);
|
||||
DiskObjectStorage::getRemotePathsRecursive(fs::path(local_path) / it->name(), paths_map, skip_predicate);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -48,7 +48,10 @@ public:
|
||||
|
||||
StoredObjects getStorageObjects(const String & local_path) const override;
|
||||
|
||||
void getRemotePathsRecursive(const String & local_path, std::vector<LocalPathWithObjectStoragePaths> & paths_map) override;
|
||||
void getRemotePathsRecursive(
|
||||
const String & local_path,
|
||||
std::vector<LocalPathWithObjectStoragePaths> & paths_map,
|
||||
const std::function<bool(const String &)> & skip_predicate) override;
|
||||
|
||||
const std::string & getCacheName() const override { return object_storage->getCacheName(); }
|
||||
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Disks/IDisk.h>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -58,8 +59,20 @@ Pipe StorageSystemRemoteDataPaths::read(
|
||||
if (disk->isRemote())
|
||||
{
|
||||
std::vector<IDisk::LocalPathWithObjectStoragePaths> remote_paths_by_local_path;
|
||||
disk->getRemotePathsRecursive("store", remote_paths_by_local_path);
|
||||
disk->getRemotePathsRecursive("data", remote_paths_by_local_path);
|
||||
disk->getRemotePathsRecursive("store", remote_paths_by_local_path, /* skip_predicate = */ {});
|
||||
disk->getRemotePathsRecursive("data", remote_paths_by_local_path, /* skip_predicate = */ {});
|
||||
if (context->getSettingsRef().traverse_shadow_remote_data_paths)
|
||||
disk->getRemotePathsRecursive(
|
||||
"shadow",
|
||||
remote_paths_by_local_path,
|
||||
[](const String & local_path)
|
||||
{
|
||||
// `shadow/{backup_name}/revision.txt` is not an object metadata file
|
||||
const auto path = fs::path(local_path);
|
||||
return path.filename() == "revision.txt" &&
|
||||
path.parent_path().has_parent_path() &&
|
||||
path.parent_path().parent_path().filename() == "shadow";
|
||||
});
|
||||
|
||||
FileCachePtr cache;
|
||||
|
||||
|
@ -0,0 +1,2 @@
|
||||
1
|
||||
1
|
29
tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh
Executable file
29
tests/queries/0_stateless/03000_traverse_shadow_system_data_paths.sh
Executable file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
TABLE="03000_traverse_shadow_system_data_path_table"
|
||||
BACKUP="03000_traverse_shadow_system_data_path_backup"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE ${TABLE} (
|
||||
id Int64,
|
||||
data String
|
||||
) ENGINE=MergeTree()
|
||||
ORDER BY id
|
||||
SETTINGS storage_policy='s3_cache';"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO ${TABLE} VALUES (0, 'data');"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT count() > 0 FROM system.remote_data_paths WHERE disk_name = 's3_cache'"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="ALTER TABLE ${TABLE} FREEZE WITH NAME '${BACKUP}';"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE ${TABLE} SYNC;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "
|
||||
SELECT count() > 0
|
||||
FROM system.remote_data_paths
|
||||
WHERE disk_name = 's3_cache' AND local_path LIKE '%shadow/${BACKUP}%'
|
||||
SETTINGS traverse_shadow_remote_data_paths=1;"
|
||||
${CLICKHOUSE_CLIENT} --query "SYSTEM UNFREEZE WITH NAME '${BACKUP}';" &>/dev/null || true
|
Loading…
Reference in New Issue
Block a user