mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 00:30:49 +00:00
Merge pull request #59170 from azat/disks/s3-plain-improvements
Small improvements for tables on write-once (s3_plain)/read-only (web) disks
This commit is contained in:
commit
a29ab8f29c
@ -98,6 +98,8 @@ DirectoryIteratorPtr MetadataStorageFromPlainObjectStorage::iterateDirectory(con
|
||||
{
|
||||
/// Required for MergeTree
|
||||
auto paths = listDirectory(path);
|
||||
// Prepend path, since iterateDirectory() includes path, unlike listDirectory()
|
||||
std::for_each(paths.begin(), paths.end(), [&](auto & child) { child = fs::path(path) / child; });
|
||||
std::vector<std::filesystem::path> fs_paths(paths.begin(), paths.end());
|
||||
return std::make_unique<StaticDirectoryIterator>(std::move(fs_paths));
|
||||
}
|
||||
@ -121,6 +123,12 @@ void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::str
|
||||
metadata_storage.object_storage->removeObject(object);
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std::string & path)
|
||||
{
|
||||
for (auto it = metadata_storage.iterateDirectory(path); it->isValid(); it->next())
|
||||
metadata_storage.object_storage->removeObject(StoredObject(it->path()));
|
||||
}
|
||||
|
||||
void MetadataStorageFromPlainObjectStorageTransaction::createDirectory(const std::string &)
|
||||
{
|
||||
/// Noop. It is an Object Storage not a filesystem.
|
||||
|
@ -101,12 +101,13 @@ public:
|
||||
void createDirectoryRecursive(const std::string & path) override;
|
||||
|
||||
void unlinkFile(const std::string & path) override;
|
||||
void removeDirectory(const std::string & path) override;
|
||||
|
||||
UnlinkMetadataFileOperationOutcomePtr unlinkMetadata(const std::string & path) override;
|
||||
|
||||
void commit() override
|
||||
{
|
||||
/// Nothing to commit.
|
||||
/// TODO: rewrite with transactions
|
||||
}
|
||||
|
||||
bool supportsChmod() const override { return false; }
|
||||
|
@ -90,6 +90,7 @@
|
||||
|
||||
#include <base/insertAtEnd.h>
|
||||
#include <base/interpolate.h>
|
||||
#include <base/defines.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
@ -300,7 +301,11 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re
|
||||
if (disk->isBroken())
|
||||
continue;
|
||||
|
||||
if (!disk->isReadOnly())
|
||||
/// Write once disk is almost the same as read-only for MergeTree,
|
||||
/// since it does not support move, that is required for any
|
||||
/// operation over MergeTree, so avoid writing format_version.txt
|
||||
/// into it as well, to avoid leaving it after DROP.
|
||||
if (!disk->isReadOnly() && !disk->isWriteOnce())
|
||||
{
|
||||
auto buf = disk->writeFile(format_version_path, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite, getContext()->getWriteSettings());
|
||||
writeIntText(format_version.toUnderType(), *buf);
|
||||
@ -2740,6 +2745,15 @@ void MergeTreeData::renameInMemory(const StorageID & new_table_id)
|
||||
|
||||
void MergeTreeData::dropAllData()
|
||||
{
|
||||
/// In case there is read-only/write-once disk we cannot allow to call dropAllData(), but dropping tables is allowed.
|
||||
///
|
||||
/// Note, that one may think that drop on write-once disk should be
|
||||
/// supported, since it is pretty trivial to implement
|
||||
/// MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(),
|
||||
/// however removing part requires moveDirectory() as well.
|
||||
if (isStaticStorage())
|
||||
return;
|
||||
|
||||
LOG_TRACE(log, "dropAllData: waiting for locks.");
|
||||
auto settings_ptr = getSettings();
|
||||
|
||||
@ -7071,6 +7085,8 @@ std::pair<MergeTreeData::MutableDataPartPtr, scope_guard> MergeTreeData::cloneAn
|
||||
const ReadSettings & read_settings,
|
||||
const WriteSettings & write_settings)
|
||||
{
|
||||
chassert(!isStaticStorage());
|
||||
|
||||
/// Check that the storage policy contains the disk where the src_part is located.
|
||||
bool does_storage_policy_allow_same_disk = false;
|
||||
for (const DiskPtr & disk : getStoragePolicy()->getDisks())
|
||||
|
@ -65,6 +65,7 @@ namespace ErrorCodes
|
||||
extern const int NO_SUCH_DATA_PART;
|
||||
extern const int ABORTED;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
extern const int TABLE_IS_READ_ONLY;
|
||||
}
|
||||
|
||||
namespace ActionLocks
|
||||
@ -294,6 +295,8 @@ std::optional<UInt64> StorageMergeTree::totalBytesUncompressed(const Settings &)
|
||||
SinkToStoragePtr
|
||||
StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context, bool /*async_insert*/)
|
||||
{
|
||||
assertNotReadonly();
|
||||
|
||||
const auto & settings = local_context->getSettingsRef();
|
||||
return std::make_shared<MergeTreeSink>(
|
||||
*this, metadata_snapshot, settings.max_partitions_per_insert_block, local_context);
|
||||
@ -319,9 +322,6 @@ void StorageMergeTree::checkTableCanBeDropped(ContextPtr query_context) const
|
||||
void StorageMergeTree::drop()
|
||||
{
|
||||
shutdown(true);
|
||||
/// In case there is read-only disk we cannot allow to call dropAllData(), but dropping tables is allowed.
|
||||
if (isStaticStorage())
|
||||
return;
|
||||
dropAllData();
|
||||
}
|
||||
|
||||
@ -330,6 +330,8 @@ void StorageMergeTree::alter(
|
||||
ContextPtr local_context,
|
||||
AlterLockHolder & table_lock_holder)
|
||||
{
|
||||
assertNotReadonly();
|
||||
|
||||
if (local_context->getCurrentTransaction() && local_context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ALTER METADATA is not supported inside transactions");
|
||||
|
||||
@ -620,6 +622,8 @@ void StorageMergeTree::setMutationCSN(const String & mutation_id, CSN csn)
|
||||
|
||||
void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context)
|
||||
{
|
||||
assertNotReadonly();
|
||||
|
||||
delayMutationOrThrowIfNeeded(nullptr, query_context);
|
||||
|
||||
/// Validate partition IDs (if any) before starting mutation
|
||||
@ -810,6 +814,8 @@ std::vector<MergeTreeMutationStatus> StorageMergeTree::getMutationsStatus() cons
|
||||
|
||||
CancellationCode StorageMergeTree::killMutation(const String & mutation_id)
|
||||
{
|
||||
assertNotReadonly();
|
||||
|
||||
LOG_TRACE(log, "Killing mutation {}", mutation_id);
|
||||
UInt64 mutation_version = MergeTreeMutationEntry::tryParseFileName(mutation_id);
|
||||
if (!mutation_version)
|
||||
@ -1520,6 +1526,8 @@ bool StorageMergeTree::optimize(
|
||||
bool cleanup,
|
||||
ContextPtr local_context)
|
||||
{
|
||||
assertNotReadonly();
|
||||
|
||||
if (deduplicate)
|
||||
{
|
||||
if (deduplicate_by_columns.empty())
|
||||
@ -1765,6 +1773,8 @@ void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_pa
|
||||
|
||||
void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr query_context, TableExclusiveLockHolder &)
|
||||
{
|
||||
assertNotReadonly();
|
||||
|
||||
{
|
||||
/// Asks to complete merges and does not allow them to start.
|
||||
/// This protects against "revival" of data for a removed partition after completion of merge.
|
||||
@ -2039,6 +2049,8 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition(
|
||||
|
||||
void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, ContextPtr local_context)
|
||||
{
|
||||
assertNotReadonly();
|
||||
|
||||
auto lock1 = lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
|
||||
auto lock2 = source_table->lockForShare(local_context->getCurrentQueryId(), local_context->getSettingsRef().lock_acquire_timeout);
|
||||
auto merges_blocker = stopMergesAndWait();
|
||||
@ -2437,6 +2449,12 @@ PreparedSetsCachePtr StorageMergeTree::getPreparedSetsCache(Int64 mutation_id)
|
||||
return cache;
|
||||
}
|
||||
|
||||
void StorageMergeTree::assertNotReadonly() const
|
||||
{
|
||||
if (isStaticStorage())
|
||||
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage");
|
||||
}
|
||||
|
||||
void StorageMergeTree::fillNewPartName(MutableDataPartPtr & part, DataPartsLock &)
|
||||
{
|
||||
part->info.min_block = part->info.max_block = increment.get();
|
||||
|
@ -273,6 +273,8 @@ private:
|
||||
|
||||
PreparedSetsCachePtr getPreparedSetsCache(Int64 mutation_id);
|
||||
|
||||
void assertNotReadonly() const;
|
||||
|
||||
friend class MergeTreeSink;
|
||||
friend class MergeTreeData;
|
||||
friend class MergePlainMergeTreeTask;
|
||||
|
@ -4661,6 +4661,9 @@ bool StorageReplicatedMergeTree::fetchPart(
|
||||
zkutil::ZooKeeper::Ptr zookeeper_,
|
||||
bool try_fetch_shared)
|
||||
{
|
||||
if (isStaticStorage())
|
||||
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage");
|
||||
|
||||
auto zookeeper = zookeeper_ ? zookeeper_ : getZooKeeper();
|
||||
const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version);
|
||||
|
||||
@ -5498,6 +5501,8 @@ void StorageReplicatedMergeTree::assertNotReadonly() const
|
||||
{
|
||||
if (is_readonly)
|
||||
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode (replica path: {})", replica_path);
|
||||
if (isStaticStorage())
|
||||
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage");
|
||||
}
|
||||
|
||||
|
||||
@ -5506,6 +5511,8 @@ SinkToStoragePtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, con
|
||||
if (!initialization_done)
|
||||
throw Exception(ErrorCodes::NOT_INITIALIZED, "Table is not initialized yet");
|
||||
|
||||
if (isStaticStorage())
|
||||
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to static storage");
|
||||
/// If table is read-only because it doesn't have metadata in zk yet, then it's not possible to insert into it
|
||||
/// Without this check, we'll write data parts on disk, and afterwards will remove them since we'll fail to commit them into zk
|
||||
/// In case of remote storage like s3, it'll generate unnecessary PUT requests
|
||||
|
@ -4,11 +4,17 @@
|
||||
<s3_disk>
|
||||
<type>s3</type>
|
||||
<path>s3_disk/</path>
|
||||
<endpoint>http://localhost:11111/test/common/</endpoint>
|
||||
<endpoint>http://localhost:11111/test/s3/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
<request_timeout_ms>20000</request_timeout_ms>
|
||||
</s3_disk>
|
||||
<s3_plain_disk>
|
||||
<type>s3_plain</type>
|
||||
<endpoint>http://localhost:11111/test/s3_plain/</endpoint>
|
||||
<access_key_id>clickhouse</access_key_id>
|
||||
<secret_access_key>clickhouse</secret_access_key>
|
||||
</s3_plain_disk>
|
||||
<s3_cache>
|
||||
<type>cache</type>
|
||||
<disk>s3_disk</disk>
|
||||
|
@ -172,7 +172,7 @@ def test_incorrect_usage(cluster):
|
||||
assert "Table is read-only" in result
|
||||
|
||||
result = node2.query_and_get_error("OPTIMIZE TABLE test0 FINAL")
|
||||
assert "Only read-only operations are supported" in result
|
||||
assert "Table is in readonly mode due to static storage" in result
|
||||
|
||||
node2.query("DROP TABLE test0 SYNC")
|
||||
|
||||
|
@ -0,0 +1,30 @@
|
||||
data after INSERT 1
|
||||
data after ATTACH 1
|
||||
Files before DETACH TABLE
|
||||
all_1_1_0
|
||||
|
||||
backups/ordinary_default/data/ordinary_default/data/all_1_1_0:
|
||||
primary.cidx
|
||||
serialization.json
|
||||
metadata_version.txt
|
||||
default_compression_codec.txt
|
||||
data.bin
|
||||
data.cmrk3
|
||||
count.txt
|
||||
columns.txt
|
||||
checksums.txt
|
||||
|
||||
Files after DETACH TABLE
|
||||
all_1_1_0
|
||||
|
||||
backups/ordinary_default/data/ordinary_default/data/all_1_1_0:
|
||||
primary.cidx
|
||||
serialization.json
|
||||
metadata_version.txt
|
||||
default_compression_codec.txt
|
||||
data.bin
|
||||
data.cmrk3
|
||||
count.txt
|
||||
columns.txt
|
||||
checksums.txt
|
||||
|
56
tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh
Executable file
56
tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_MergeTree.sh
Executable file
@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest, no-random-settings, no-random-merge-tree-settings
|
||||
# Tag no-fasttest: requires S3
|
||||
# Tag no-random-settings, no-random-merge-tree-settings: to avoid creating extra files like serialization.json, this test too exocit anyway
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
# config for clickhouse-disks (to check leftovers)
|
||||
config="${BASH_SOURCE[0]/.sh/.yml}"
|
||||
|
||||
# only in Atomic ATTACH from s3_plain works
|
||||
new_database="ordinary_$CLICKHOUSE_DATABASE"
|
||||
$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "create database $new_database engine=Ordinary"
|
||||
CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT/--database=$CLICKHOUSE_DATABASE/--database=$new_database}
|
||||
CLICKHOUSE_DATABASE="$new_database"
|
||||
|
||||
$CLICKHOUSE_CLIENT -nm -q "
|
||||
drop table if exists data;
|
||||
create table data (key Int) engine=MergeTree() order by key;
|
||||
insert into data values (1);
|
||||
select 'data after INSERT', count() from data;
|
||||
"
|
||||
|
||||
# suppress output
|
||||
$CLICKHOUSE_CLIENT -q "backup table data to S3('http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', 'test', 'testtest')" > /dev/null
|
||||
|
||||
$CLICKHOUSE_CLIENT -nm -q "
|
||||
drop table data;
|
||||
attach table data (key Int) engine=MergeTree() order by key
|
||||
settings
|
||||
max_suspicious_broken_parts=0,
|
||||
disk=disk(type=s3_plain,
|
||||
endpoint='http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE',
|
||||
access_key_id='test',
|
||||
secret_access_key='testtest');
|
||||
select 'data after ATTACH', count() from data;
|
||||
|
||||
insert into data values (1); -- { serverError TABLE_IS_READ_ONLY }
|
||||
optimize table data final; -- { serverError TABLE_IS_READ_ONLY }
|
||||
"
|
||||
|
||||
path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM system.tables WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'data'")
|
||||
# trim / to fix "Unable to parse ExceptionName: XMinioInvalidObjectName Message: Object name contains unsupported characters."
|
||||
path=${path%/}
|
||||
|
||||
echo "Files before DETACH TABLE"
|
||||
clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "detach table data"
|
||||
echo "Files after DETACH TABLE"
|
||||
clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2
|
||||
|
||||
# metadata file is left
|
||||
$CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE"
|
@ -0,0 +1,7 @@
|
||||
storage_configuration:
|
||||
disks:
|
||||
s3_plain_disk:
|
||||
type: s3_plain
|
||||
endpoint: http://localhost:11111/test/s3_plain/
|
||||
access_key_id: clickhouse
|
||||
secret_access_key: clickhouse
|
@ -0,0 +1,30 @@
|
||||
data after INSERT 1
|
||||
data after ATTACH 1
|
||||
Files before DETACH TABLE
|
||||
all_X_X_X
|
||||
|
||||
backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X:
|
||||
primary.cidx
|
||||
serialization.json
|
||||
metadata_version.txt
|
||||
default_compression_codec.txt
|
||||
data.bin
|
||||
data.cmrk3
|
||||
count.txt
|
||||
columns.txt
|
||||
checksums.txt
|
||||
|
||||
Files after DETACH TABLE
|
||||
all_X_X_X
|
||||
|
||||
backups/ordinary_default/data/ordinary_default/data_read/all_X_X_X:
|
||||
primary.cidx
|
||||
serialization.json
|
||||
metadata_version.txt
|
||||
default_compression_codec.txt
|
||||
data.bin
|
||||
data.cmrk3
|
||||
count.txt
|
||||
columns.txt
|
||||
checksums.txt
|
||||
|
65
tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh
Executable file
65
tests/queries/0_stateless/02980_s3_plain_DROP_TABLE_ReplicatedMergeTree.sh
Executable file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest, no-random-settings, no-random-merge-tree-settings
|
||||
# Tag no-fasttest: requires S3
|
||||
# Tag no-random-settings, no-random-merge-tree-settings: to avoid creating extra files like serialization.json, this test too exocit anyway
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
config="${BASH_SOURCE[0]/.sh/.yml}"
|
||||
|
||||
# only in Atomic ATTACH from s3_plain works
|
||||
new_database="ordinary_$CLICKHOUSE_DATABASE"
|
||||
$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "create database $new_database engine=Ordinary"
|
||||
CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT/--database=$CLICKHOUSE_DATABASE/--database=$new_database}
|
||||
CLICKHOUSE_DATABASE="$new_database"
|
||||
|
||||
$CLICKHOUSE_CLIENT -nm -q "
|
||||
drop table if exists data_read;
|
||||
drop table if exists data_write;
|
||||
|
||||
create table data_write (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'write') order by key;
|
||||
create table data_read (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'read') order by key;
|
||||
|
||||
insert into data_write values (1);
|
||||
system sync replica data_read;
|
||||
select 'data after INSERT', count() from data_read;
|
||||
"
|
||||
|
||||
# suppress output
|
||||
$CLICKHOUSE_CLIENT -q "backup table data_read to S3('http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE', 'test', 'testtest')" > /dev/null
|
||||
|
||||
$CLICKHOUSE_CLIENT -nm -q "
|
||||
drop table data_read;
|
||||
attach table data_read (key Int) engine=ReplicatedMergeTree('/tables/{database}/data', 'read') order by key
|
||||
settings
|
||||
max_suspicious_broken_parts=0,
|
||||
disk=disk(type=s3_plain,
|
||||
endpoint='http://localhost:11111/test/s3_plain/backups/$CLICKHOUSE_DATABASE',
|
||||
access_key_id='test',
|
||||
secret_access_key='testtest');
|
||||
select 'data after ATTACH', count() from data_read;
|
||||
|
||||
insert into data_read values (1); -- { serverError TABLE_IS_READ_ONLY }
|
||||
optimize table data_read final; -- { serverError TABLE_IS_READ_ONLY }
|
||||
system sync replica data_read; -- { serverError TABLE_IS_READ_ONLY }
|
||||
"
|
||||
|
||||
path=$($CLICKHOUSE_CLIENT -q "SELECT replace(data_paths[1], 's3_plain', '') FROM system.tables WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'data_read'")
|
||||
# trim / to fix "Unable to parse ExceptionName: XMinioInvalidObjectName Message: Object name contains unsupported characters."
|
||||
path=${path%/}
|
||||
|
||||
echo "Files before DETACH TABLE"
|
||||
# sed to match any part, since in case of fault injection part name may not be all_0_0_0 but all_1_1_0
|
||||
clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "${path:?}" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g'
|
||||
|
||||
$CLICKHOUSE_CLIENT -nm -q "
|
||||
detach table data_read;
|
||||
detach table data_write;
|
||||
"
|
||||
echo "Files after DETACH TABLE"
|
||||
clickhouse-disks -C "$config" --disk s3_plain_disk list --recursive "$path" | tail -n+2 | sed 's/all_[^_]*_[^_]*_0/all_X_X_X/g'
|
||||
|
||||
# metadata file is left
|
||||
$CLICKHOUSE_CLIENT --force_remove_data_recursively_on_drop=1 -q "drop database if exists $CLICKHOUSE_DATABASE"
|
@ -0,0 +1,7 @@
|
||||
storage_configuration:
|
||||
disks:
|
||||
s3_plain_disk:
|
||||
type: s3_plain
|
||||
endpoint: http://localhost:11111/test/s3_plain/
|
||||
access_key_id: clickhouse
|
||||
secret_access_key: clickhouse
|
Loading…
Reference in New Issue
Block a user