From 109980eb275c064d08bc031bfdc14d95b9a7272b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 14 Jun 2022 16:42:30 +0300 Subject: [PATCH 01/17] Fix "Missing columns" for GLOBAL JOIN with CTE w/o alias Signed-off-by: Azat Khuzhin --- src/Parsers/ASTSubquery.cpp | 16 ++++++++++++++++ src/Parsers/ASTSubquery.h | 2 ++ .../0_stateless/02341_global_join_cte.reference | 12 ++++++++++++ .../0_stateless/02341_global_join_cte.sql | 4 ++++ 4 files changed, 34 insertions(+) create mode 100644 tests/queries/0_stateless/02341_global_join_cte.reference create mode 100644 tests/queries/0_stateless/02341_global_join_cte.sql diff --git a/src/Parsers/ASTSubquery.cpp b/src/Parsers/ASTSubquery.cpp index 84b7862c630..a3408f12330 100644 --- a/src/Parsers/ASTSubquery.cpp +++ b/src/Parsers/ASTSubquery.cpp @@ -60,5 +60,21 @@ void ASTSubquery::updateTreeHashImpl(SipHash & hash_state) const IAST::updateTreeHashImpl(hash_state); } +String ASTSubquery::getAliasOrColumnName() const +{ + if (!alias.empty()) + return alias; + if (!cte_name.empty()) + return cte_name; + return getColumnName(); +} + +String ASTSubquery::tryGetAlias() const +{ + if (!alias.empty()) + return alias; + return cte_name; +} + } diff --git a/src/Parsers/ASTSubquery.h b/src/Parsers/ASTSubquery.h index f88b257ddf7..7d0fabf3ed4 100644 --- a/src/Parsers/ASTSubquery.h +++ b/src/Parsers/ASTSubquery.h @@ -33,6 +33,8 @@ public: } void updateTreeHashImpl(SipHash & hash_state) const override; + String getAliasOrColumnName() const override; + String tryGetAlias() const override; protected: void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/tests/queries/0_stateless/02341_global_join_cte.reference b/tests/queries/0_stateless/02341_global_join_cte.reference new file mode 100644 index 00000000000..8b3cd68232a --- /dev/null +++ b/tests/queries/0_stateless/02341_global_join_cte.reference @@ -0,0 +1,12 @@ +-- { echo } +with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2; -- { serverError ALIAS_REQUIRED } +with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2 settings joined_subquery_requires_alias=0; +0 +0 +0 +0 +with rhs_ as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs_ rhs using (d1) order by rhs.d2 settings joined_subquery_requires_alias=0; +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02341_global_join_cte.sql b/tests/queries/0_stateless/02341_global_join_cte.sql new file mode 100644 index 00000000000..b77e5b0b688 --- /dev/null +++ b/tests/queries/0_stateless/02341_global_join_cte.sql @@ -0,0 +1,4 @@ +-- { echo } +with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2; -- { serverError ALIAS_REQUIRED } +with rhs as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs using (d1) order by rhs.d2 settings joined_subquery_requires_alias=0; +with rhs_ as (select * from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one))) select lhs.d2 from remote('127.{1,2}', view(select dummy d1, dummy d2 from system.one)) lhs global join rhs_ rhs using (d1) order by rhs.d2 settings joined_subquery_requires_alias=0; From a59be0fd5d3d0d304f02ff1657df7518514bcb0e Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 15 Jun 2022 00:23:45 +0000 Subject: [PATCH 02/17] better support of GCP storage --- src/Disks/IDisk.h | 1 - .../ObjectStorages/S3/S3ObjectStorage.cpp | 95 +++++++------------ src/Disks/ObjectStorages/S3/S3ObjectStorage.h | 3 + src/IO/WriteBufferFromS3.cpp | 6 -- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 7 -- src/Storages/StorageMergeTree.cpp | 2 - 6 files changed, 35 insertions(+), 79 deletions(-) diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 735fd9a33f8..b7af4d3f861 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -208,7 +208,6 @@ public: /// Second bool param is a flag to remove (true) or keep (false) shared data on S3 virtual void removeSharedFileIfExists(const String & path, bool /* keep_shared_data */) { removeFileIfExists(path); } - virtual String getCacheBasePath() const { return ""; } /// Returns a list of paths because for Log family engines there might be diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index e7fb5e1e5ac..df22724a618 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -39,23 +39,13 @@ namespace ErrorCodes namespace { -template -void throwIfError(Aws::Utils::Outcome & response) -{ - if (!response.IsSuccess()) - { - const auto & err = response.GetError(); - throw Exception(std::to_string(static_cast(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR); - } -} - template void throwIfError(const Aws::Utils::Outcome & response) { if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw Exception(err.GetMessage(), static_cast(err.GetErrorType())); + throw Exception(ErrorCodes::S3_ERROR, "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); } } @@ -211,11 +201,23 @@ void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & chi } while (outcome.GetResult().GetIsTruncated()); } -void S3ObjectStorage::removeObject(const std::string & path) +static bool isNotFoundError(Aws::S3::S3Errors error) +{ + return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND + || error == Aws::S3::S3Errors::NO_SUCH_KEY; +} + +void S3ObjectStorage::removeObjectImpl(const std::string & path, bool if_exists) { auto client_ptr = client.get(); auto settings_ptr = s3_settings.get(); + auto throw_if_error = [&](auto & outcome) + { + if (!outcome.IsSuccess() && (!if_exists || !isNotFoundError(outcome.GetError().GetErrorType()))) + throwIfError(outcome); + }; + // If chunk size is 0, only use single delete request // This allows us to work with GCS, which doesn't support DeleteObjects if (!s3_capabilities.support_batch_delete) @@ -225,7 +227,7 @@ void S3ObjectStorage::removeObject(const std::string & path) request.SetKey(path); auto outcome = client_ptr->DeleteObject(request); - throwIfError(outcome); + throw_if_error(outcome); } else { @@ -240,11 +242,11 @@ void S3ObjectStorage::removeObject(const std::string & path) request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - throwIfError(outcome); + throw_if_error(outcome); } } -void S3ObjectStorage::removeObjects(const std::vector & paths) +void S3ObjectStorage::removeObjectsImpl(const std::vector & paths, bool if_exists) { if (paths.empty()) return; @@ -255,7 +257,7 @@ void S3ObjectStorage::removeObjects(const std::vector & paths) if (!s3_capabilities.support_batch_delete) { for (const auto & path : paths) - removeObject(path); + removeObjectImpl(path, if_exists); } else { @@ -283,64 +285,31 @@ void S3ObjectStorage::removeObjects(const std::vector & paths) request.SetBucket(bucket); request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - throwIfError(outcome); + + if (!outcome.IsSuccess() && (!if_exists || !isNotFoundError(outcome.GetError().GetErrorType()))) + throwIfError(outcome); } } } +void S3ObjectStorage::removeObject(const std::string & path) +{ + removeObjectImpl(path, false); +} + void S3ObjectStorage::removeObjectIfExists(const std::string & path) { - auto client_ptr = client.get(); - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(path); + removeObjectImpl(path, true); +} - Aws::S3::Model::Delete delkeys; - delkeys.SetObjects({obj}); - - Aws::S3::Model::DeleteObjectsRequest request; - request.SetBucket(bucket); - request.SetDelete(delkeys); - auto outcome = client_ptr->DeleteObjects(request); - if (!outcome.IsSuccess() && outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - throwIfError(outcome); +void S3ObjectStorage::removeObjects(const std::vector & paths) +{ + removeObjectsImpl(paths, false); } void S3ObjectStorage::removeObjectsIfExist(const std::vector & paths) { - if (paths.empty()) - return; - - auto client_ptr = client.get(); - auto settings_ptr = s3_settings.get(); - - - size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; - size_t current_position = 0; - - while (current_position < paths.size()) - { - std::vector current_chunk; - String keys; - for (; current_position < paths.size() && current_chunk.size() < chunk_size_limit; ++current_position) - { - Aws::S3::Model::ObjectIdentifier obj; - obj.SetKey(paths[current_position]); - current_chunk.push_back(obj); - - if (!keys.empty()) - keys += ", "; - keys += paths[current_position]; - } - - Aws::S3::Model::Delete delkeys; - delkeys.SetObjects(current_chunk); - Aws::S3::Model::DeleteObjectsRequest request; - request.SetBucket(bucket); - request.SetDelete(delkeys); - auto outcome = client_ptr->DeleteObjects(request); - if (!outcome.IsSuccess() && outcome.GetError().GetErrorType() != Aws::S3::S3Errors::RESOURCE_NOT_FOUND) - throwIfError(outcome); - } + removeObjectsImpl(paths, true); } ObjectMetadata S3ObjectStorage::getObjectMetadata(const std::string & path) const diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index 23ebed0b8b9..16e6c9c8cd5 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -126,6 +126,9 @@ private: std::optional head = std::nullopt, std::optional metadata = std::nullopt) const; + void removeObjectImpl(const std::string & path, bool if_exists); + void removeObjectsImpl(const std::vector & paths, bool if_exists); + Aws::S3::Model::HeadObjectOutcome requestObjectHeadData(const std::string & bucket_from, const std::string & key) const; std::string bucket; diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 1d3ec6095d5..a21c15820cb 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -389,12 +389,6 @@ void WriteBufferFromS3::makeSinglepartUpload() return; } - if (size == 0) - { - LOG_TRACE(log, "Skipping single part upload. Buffer is empty."); - return; - } - if (schedule) { put_object_task = std::make_unique(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index be3ff3ea614..12ea2943c8a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1670,18 +1670,11 @@ void IMergeTreeDataPart::remove() const /// Remove each expected file in directory, then remove directory itself. IDisk::RemoveBatchRequest request; - #if !defined(__clang__) - # pragma GCC diagnostic push - # pragma GCC diagnostic ignored "-Wunused-variable" - #endif for (const auto & [file, _] : checksums.files) { if (projection_directories.find(file) == projection_directories.end()) request.emplace_back(fs::path(to) / file); } - #if !defined(__clang__) - # pragma GCC diagnostic pop - #endif for (const auto & file : {"checksums.txt", "columns.txt"}) request.emplace_back(fs::path(to) / file); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9955858698f..d57e41ac94f 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1413,7 +1413,6 @@ ActionLock StorageMergeTree::stopMergesAndWait() MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, const String & part_name, bool force) { - if (force) { /// Forcefully stop merges and make part outdated @@ -1426,7 +1425,6 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c } else { - /// Wait merges selector std::unique_lock lock(currently_processing_in_background_mutex); From eea7e4eced7e754c20c4c7b81f60f5972f695b00 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 15 Jun 2022 15:02:02 +0000 Subject: [PATCH 03/17] fix test --- tests/integration/test_storage_s3/test.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 58b9dac8cb0..ec7c746c549 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -324,7 +324,7 @@ def test_empty_put(started_cluster, auth): run_query(instance, put_query) - try: + assert ( run_query( instance, "select count(*) from s3('http://{}:{}/{}/{}', {}'CSV', '{}')".format( @@ -336,10 +336,8 @@ def test_empty_put(started_cluster, auth): table_format, ), ) - - assert False, "Query should be failed." - except helpers.client.QueryRuntimeException as e: - assert str(e).find("The specified key does not exist") != 0 + == "0\n" + ) # Test put values in CSV format. From 464cb5992088b01ab0a5b21f8cb0d6d8d0d7c70d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 20 Jun 2022 16:22:36 +0000 Subject: [PATCH 04/17] slightly better code --- .../ObjectStorages/S3/S3ObjectStorage.cpp | 46 +++++++++++-------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index df22724a618..3c3fe40ae37 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -39,6 +39,12 @@ namespace ErrorCodes namespace { +bool isNotFoundError(Aws::S3::S3Errors error) +{ + return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND + || error == Aws::S3::S3Errors::NO_SUCH_KEY; +} + template void throwIfError(const Aws::Utils::Outcome & response) { @@ -49,6 +55,20 @@ void throwIfError(const Aws::Utils::Outcome & response) } } +template +void throwIfUnexpectedError(const Aws::Utils::Outcome & response, bool if_exists) +{ + /// In this case even if absence of key may be ok for us, + /// the log will be polluted with error messages from aws sdk. + /// Looks like there is no way to supress them. + + if (!response.IsSuccess() && (!if_exists || !isNotFoundError(response.GetError().GetErrorType()))) + { + const auto & err = response.GetError(); + throw Exception(ErrorCodes::S3_ERROR, "{} (Code: {})", err.GetMessage(), static_cast(err.GetErrorType())); + } +} + template void logIfError(const Aws::Utils::Outcome & response, std::function && msg) { @@ -201,22 +221,9 @@ void S3ObjectStorage::listPrefix(const std::string & path, BlobsPathToSize & chi } while (outcome.GetResult().GetIsTruncated()); } -static bool isNotFoundError(Aws::S3::S3Errors error) -{ - return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND - || error == Aws::S3::S3Errors::NO_SUCH_KEY; -} - void S3ObjectStorage::removeObjectImpl(const std::string & path, bool if_exists) { auto client_ptr = client.get(); - auto settings_ptr = s3_settings.get(); - - auto throw_if_error = [&](auto & outcome) - { - if (!outcome.IsSuccess() && (!if_exists || !isNotFoundError(outcome.GetError().GetErrorType()))) - throwIfError(outcome); - }; // If chunk size is 0, only use single delete request // This allows us to work with GCS, which doesn't support DeleteObjects @@ -227,7 +234,7 @@ void S3ObjectStorage::removeObjectImpl(const std::string & path, bool if_exists) request.SetKey(path); auto outcome = client_ptr->DeleteObject(request); - throw_if_error(outcome); + throwIfUnexpectedError(outcome, if_exists); } else { @@ -242,7 +249,7 @@ void S3ObjectStorage::removeObjectImpl(const std::string & path, bool if_exists) request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - throw_if_error(outcome); + throwIfUnexpectedError(outcome, if_exists); } } @@ -251,9 +258,6 @@ void S3ObjectStorage::removeObjectsImpl(const std::vector & paths, if (paths.empty()) return; - auto client_ptr = client.get(); - auto settings_ptr = s3_settings.get(); - if (!s3_capabilities.support_batch_delete) { for (const auto & path : paths) @@ -261,6 +265,9 @@ void S3ObjectStorage::removeObjectsImpl(const std::vector & paths, } else { + auto client_ptr = client.get(); + auto settings_ptr = s3_settings.get(); + size_t chunk_size_limit = settings_ptr->objects_chunk_size_to_delete; size_t current_position = 0; @@ -286,8 +293,7 @@ void S3ObjectStorage::removeObjectsImpl(const std::vector & paths, request.SetDelete(delkeys); auto outcome = client_ptr->DeleteObjects(request); - if (!outcome.IsSuccess() && (!if_exists || !isNotFoundError(outcome.GetError().GetErrorType()))) - throwIfError(outcome); + throwIfUnexpectedError(outcome, if_exists); } } } From dcc9cd0367b61289cfff5789c61b3648a1cb2639 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Mon, 20 Jun 2022 22:50:39 +0200 Subject: [PATCH 05/17] fix style check --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 3c3fe40ae37..3300b1da746 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -60,7 +60,7 @@ void throwIfUnexpectedError(const Aws::Utils::Outcome & response, { /// In this case even if absence of key may be ok for us, /// the log will be polluted with error messages from aws sdk. - /// Looks like there is no way to supress them. + /// Looks like there is no way to suppress them. if (!response.IsSuccess() && (!if_exists || !isNotFoundError(response.GetError().GetErrorType()))) { From ca0993577319446bb17c4f0771c0404cc7333ca7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 Jun 2022 18:58:05 +0200 Subject: [PATCH 06/17] Move into separate file --- .../MetadataStorageFromDisk.cpp | 331 ------------------ .../ObjectStorages/MetadataStorageFromDisk.h | 12 +- ...taStorageFromDiskTransactionOperations.cpp | 262 ++++++++++++++ ...dataStorageFromDiskTransactionOperations.h | 190 ++++++++++ 4 files changed, 453 insertions(+), 342 deletions(-) create mode 100644 src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp create mode 100644 src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp index 9ba92113a30..ae87e8c61c0 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.cpp @@ -32,337 +32,6 @@ std::string toString(MetadataFromDiskTransactionState state) __builtin_unreachable(); } -namespace -{ - -std::string getTempFileName(const std::string & dir) -{ - return fs::path(dir) / getRandomASCIIString(); -} - -class SetLastModifiedOperation final : public IMetadataOperation -{ - std::string path; - Poco::Timestamp new_timestamp; - Poco::Timestamp old_timestamp; - IDisk & disk; -public: - SetLastModifiedOperation(const std::string & path_, Poco::Timestamp new_timestamp_, IDisk & disk_) - : path(path_) - , new_timestamp(new_timestamp_) - , disk(disk_) - {} - - void execute() override - { - old_timestamp = disk.getLastModified(path); - disk.setLastModified(path, new_timestamp); - } - - void undo() override - { - disk.setLastModified(path, old_timestamp); - } -}; - -class UnlinkFileOperation final : public IMetadataOperation -{ - std::string path; - IDisk & disk; - std::string prev_data; -public: - UnlinkFileOperation(const std::string & path_, IDisk & disk_) - : path(path_) - , disk(disk_) - { - } - - void execute() override - { - auto buf = disk.readFile(path); - readStringUntilEOF(prev_data, *buf); - disk.removeFile(path); - } - - void undo() override - { - auto buf = disk.writeFile(path); - writeString(prev_data, *buf); - buf->finalize(); - } -}; - -class CreateDirectoryOperation final : public IMetadataOperation -{ -private: - std::string path; - IDisk & disk; -public: - CreateDirectoryOperation(const std::string & path_, IDisk & disk_) - : path(path_) - , disk(disk_) - { - } - - void execute() override - { - disk.createDirectory(path); - } - - void undo() override - { - disk.removeDirectory(path); - } -}; - -class CreateDirectoryRecursiveOperation final : public IMetadataOperation -{ -private: - std::string path; - std::vector paths_created; - IDisk & disk; -public: - CreateDirectoryRecursiveOperation(const std::string & path_, IDisk & disk_) - : path(path_) - , disk(disk_) - { - } - - void execute() override - { - namespace fs = std::filesystem; - fs::path p(path); - while (!disk.exists(p)) - { - paths_created.push_back(p); - if (!p.has_parent_path()) - break; - p = p.parent_path(); - } - for (const auto & path_to_create : paths_created | std::views::reverse) - disk.createDirectory(path_to_create); - } - - void undo() override - { - for (const auto & path_created : paths_created) - disk.removeDirectory(path_created); - } -}; - -class RemoveDirectoryOperation final : public IMetadataOperation -{ -private: - std::string path; - IDisk & disk; -public: - RemoveDirectoryOperation(const std::string & path_, IDisk & disk_) - : path(path_) - , disk(disk_) - {} - - void execute() override - { - disk.removeDirectory(path); - } - - void undo() override - { - disk.createDirectory(path); - } -}; - -class RemoveRecursiveOperation final : public IMetadataOperation -{ - std::string path; - IDisk & disk; - std::string temp_path; -public: - RemoveRecursiveOperation(const std::string & path_, IDisk & disk_) - : path(path_) - , disk(disk_) - , temp_path(getTempFileName(fs::path(path).parent_path())) - { - } - - void execute() override - { - if (disk.isFile(path)) - disk.moveFile(path, temp_path); - else if (disk.isDirectory(path)) - disk.moveDirectory(path, temp_path); - } - - void undo() override - { - if (disk.isFile(temp_path)) - disk.moveFile(temp_path, path); - else if (disk.isDirectory(temp_path)) - disk.moveDirectory(temp_path, path); - } - - void finalize() override - { - if (disk.exists(temp_path)) - disk.removeRecursive(temp_path); - - if (disk.exists(path)) - disk.removeRecursive(path); - } -}; - - -class CreateHardlinkOperation final : public IMetadataOperation -{ -private: - std::string path_from; - std::string path_to; - IDisk & disk; -public: - CreateHardlinkOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_) - : path_from(path_from_) - , path_to(path_to_) - , disk(disk_) - {} - - void execute() override - { - disk.createHardLink(path_from, path_to); - } - - void undo() override - { - disk.removeFile(path_to); - } -}; - -class MoveFileOperation final : public IMetadataOperation -{ -private: - std::string path_from; - std::string path_to; - IDisk & disk; -public: - MoveFileOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_) - : path_from(path_from_) - , path_to(path_to_) - , disk(disk_) - {} - - void execute() override - { - disk.moveFile(path_from, path_to); - } - - void undo() override - { - disk.moveFile(path_to, path_from); - } -}; - -class MoveDirectoryOperation final : public IMetadataOperation -{ -private: - std::string path_from; - std::string path_to; - IDisk & disk; -public: - MoveDirectoryOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_) - : path_from(path_from_) - , path_to(path_to_) - , disk(disk_) - {} - - void execute() override - { - disk.moveDirectory(path_from, path_to); - } - - void undo() override - { - disk.moveDirectory(path_to, path_from); - } -}; - - -class ReplaceFileOperation final : public IMetadataOperation -{ -private: - std::string path_from; - std::string path_to; - IDisk & disk; - std::string temp_path_to; -public: - ReplaceFileOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_) - : path_from(path_from_) - , path_to(path_to_) - , disk(disk_) - , temp_path_to(getTempFileName(fs::path(path_to).parent_path())) - { - } - - void execute() override - { - if (disk.exists(path_to)) - disk.moveFile(path_to, temp_path_to); - - disk.replaceFile(path_from, path_to); - } - - void undo() override - { - disk.moveFile(path_to, path_from); - disk.moveFile(temp_path_to, path_to); - } - - void finalize() override - { - disk.removeFileIfExists(temp_path_to); - } -}; - -class WriteFileOperation final : public IMetadataOperation -{ -private: - std::string path; - IDisk & disk; - std::string data; - bool existed = false; - std::string prev_data; -public: - WriteFileOperation(const std::string & path_, IDisk & disk_, const std::string & data_) - : path(path_) - , disk(disk_) - , data(data_) - {} - - void execute() override - { - if (disk.exists(path)) - { - existed = true; - auto buf = disk.readFile(path); - readStringUntilEOF(prev_data, *buf); - } - auto buf = disk.writeFile(path); - writeString(data, *buf); - buf->finalize(); - } - - void undo() override - { - if (!existed) - disk.removeFileIfExists(path); - else - { - auto buf = disk.writeFile(path); - writeString(prev_data, *buf); - } - } -}; - -} - void MetadataStorageFromDiskTransaction::writeStringToFile( /// NOLINT const std::string & path, const std::string & data) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h index cdeab5efd3e..1ac68e193f2 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDisk.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDisk.h @@ -4,21 +4,11 @@ #include #include +#include "MetadataStorageFromDiskTransactionOperations.h" namespace DB { - -struct IMetadataOperation -{ - virtual void execute() = 0; - virtual void undo() = 0; - virtual void finalize() {} - virtual ~IMetadataOperation() = default; -}; - -using MetadataOperationPtr = std::unique_ptr; - enum class MetadataFromDiskTransactionState { PREPARING, diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp new file mode 100644 index 00000000000..9d407148296 --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp @@ -0,0 +1,262 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + + +namespace DB +{ + +static std::string getTempFileName(const std::string & dir) +{ + return fs::path(dir) / getRandomASCIIString(); +} + +SetLastModifiedOperation::SetLastModifiedOperation(const std::string & path_, Poco::Timestamp new_timestamp_, IDisk & disk_) + : path(path_) + , new_timestamp(new_timestamp_) + , disk(disk_) +{ +} + +void SetLastModifiedOperation::execute() +{ + old_timestamp = disk.getLastModified(path); + disk.setLastModified(path, new_timestamp); +} + +void SetLastModifiedOperation::undo() +{ + disk.setLastModified(path, old_timestamp); +} + +UnlinkFileOperation::UnlinkFileOperation(const std::string & path_, IDisk & disk_) + : path(path_) + , disk(disk_) +{ +} + +void UnlinkFileOperation::execute() +{ + auto buf = disk.readFile(path); + readStringUntilEOF(prev_data, *buf); + std::cerr << "\n\n\nRemove file: " << path << "\n\n\n"; + disk.removeFile(path); +} + +void UnlinkFileOperation::undo() +{ + auto buf = disk.writeFile(path); + writeString(prev_data, *buf); + buf->finalize(); +} + +CreateDirectoryOperation::CreateDirectoryOperation(const std::string & path_, IDisk & disk_) + : path(path_) + , disk(disk_) +{ +} + +void CreateDirectoryOperation::execute() +{ + disk.createDirectory(path); +} + +void CreateDirectoryOperation::undo() +{ + disk.removeDirectory(path); +} + +CreateDirectoryRecursiveOperation::CreateDirectoryRecursiveOperation(const std::string & path_, IDisk & disk_) + : path(path_) + , disk(disk_) +{ +} + +void CreateDirectoryRecursiveOperation::execute() +{ + namespace fs = std::filesystem; + fs::path p(path); + while (!disk.exists(p)) + { + paths_created.push_back(p); + if (!p.has_parent_path()) + break; + p = p.parent_path(); + } + for (const auto & path_to_create : paths_created | std::views::reverse) + disk.createDirectory(path_to_create); +} + +void CreateDirectoryRecursiveOperation::undo() +{ + for (const auto & path_created : paths_created) + disk.removeDirectory(path_created); +} + +RemoveDirectoryOperation::RemoveDirectoryOperation(const std::string & path_, IDisk & disk_) + : path(path_) + , disk(disk_) +{ +} + +void RemoveDirectoryOperation::execute() +{ + disk.removeDirectory(path); +} + +void RemoveDirectoryOperation::undo() +{ + disk.createDirectory(path); +} + +RemoveRecursiveOperation::RemoveRecursiveOperation(const std::string & path_, IDisk & disk_) + : path(path_) + , disk(disk_) + , temp_path(getTempFileName(fs::path(path).parent_path())) +{ +} + +void RemoveRecursiveOperation:: execute() +{ + if (disk.isFile(path)) + disk.moveFile(path, temp_path); + else if (disk.isDirectory(path)) + disk.moveDirectory(path, temp_path); +} + +void RemoveRecursiveOperation::undo() +{ + if (disk.isFile(temp_path)) + disk.moveFile(temp_path, path); + else if (disk.isDirectory(temp_path)) + disk.moveDirectory(temp_path, path); +} + +void RemoveRecursiveOperation::finalize() +{ + if (disk.exists(temp_path)) + disk.removeRecursive(temp_path); + + if (disk.exists(path)) + disk.removeRecursive(path); +} + +CreateHardlinkOperation::CreateHardlinkOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_) + : path_from(path_from_) + , path_to(path_to_) + , disk(disk_) +{ +} + +void CreateHardlinkOperation::execute() +{ + disk.createHardLink(path_from, path_to); +} + +void CreateHardlinkOperation::undo() +{ + disk.removeFile(path_to); +} + +MoveFileOperation::MoveFileOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_) + : path_from(path_from_) + , path_to(path_to_) + , disk(disk_) +{ +} + +void MoveFileOperation::execute() +{ + disk.moveFile(path_from, path_to); +} + +void MoveFileOperation::undo() +{ + disk.moveFile(path_to, path_from); +} + +MoveDirectoryOperation::MoveDirectoryOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_) + : path_from(path_from_) + , path_to(path_to_) + , disk(disk_) +{ +} + +void MoveDirectoryOperation::execute() +{ + disk.moveDirectory(path_from, path_to); +} + +void MoveDirectoryOperation::undo() +{ + disk.moveDirectory(path_to, path_from); +} + + +ReplaceFileOperation::ReplaceFileOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_) + : path_from(path_from_) + , path_to(path_to_) + , disk(disk_) + , temp_path_to(getTempFileName(fs::path(path_to).parent_path())) +{ +} + +void ReplaceFileOperation::execute() +{ + if (disk.exists(path_to)) + disk.moveFile(path_to, temp_path_to); + + disk.replaceFile(path_from, path_to); +} + +void ReplaceFileOperation::undo() +{ + disk.moveFile(path_to, path_from); + disk.moveFile(temp_path_to, path_to); +} + +void ReplaceFileOperation::finalize() +{ + disk.removeFileIfExists(temp_path_to); +} + +WriteFileOperation::WriteFileOperation(const std::string & path_, IDisk & disk_, const std::string & data_) + : path(path_) + , disk(disk_) + , data(data_) +{ +} + +void WriteFileOperation::execute() +{ + if (disk.exists(path)) + { + existed = true; + auto buf = disk.readFile(path); + readStringUntilEOF(prev_data, *buf); + } + auto buf = disk.writeFile(path); + writeString(data, *buf); + buf->finalize(); +} + +void WriteFileOperation::undo() +{ + if (!existed) + { + disk.removeFileIfExists(path); + } + else + { + auto buf = disk.writeFile(path); + writeString(prev_data, *buf); + } +} + +} diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h new file mode 100644 index 00000000000..322eafdef6f --- /dev/null +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -0,0 +1,190 @@ +#pragma once + +#include + +namespace DB +{ +class IDisk; + + +struct IMetadataOperation +{ + virtual void execute() = 0; + virtual void undo() = 0; + virtual void finalize() {} + virtual ~IMetadataOperation() = default; +}; + +using MetadataOperationPtr = std::unique_ptr; + + +struct SetLastModifiedOperation final : public IMetadataOperation +{ + SetLastModifiedOperation(const std::string & path_, Poco::Timestamp new_timestamp_, IDisk & disk_); + + void execute() override; + + void undo() override; + +private: + std::string path; + Poco::Timestamp new_timestamp; + Poco::Timestamp old_timestamp; + IDisk & disk; +}; + + +struct UnlinkFileOperation final : public IMetadataOperation +{ + UnlinkFileOperation(const std::string & path_, IDisk & disk_); + + void execute() override; + + void undo() override; + +private: + std::string path; + IDisk & disk; + std::string prev_data; +}; + + +struct CreateDirectoryOperation final : public IMetadataOperation +{ + CreateDirectoryOperation(const std::string & path_, IDisk & disk_); + + void execute() override; + + void undo() override; + +private: + std::string path; + IDisk & disk; +}; + + +struct CreateDirectoryRecursiveOperation final : public IMetadataOperation +{ + CreateDirectoryRecursiveOperation(const std::string & path_, IDisk & disk_); + + void execute() override; + + void undo() override; + +private: + std::string path; + std::vector paths_created; + IDisk & disk; +}; + + +struct RemoveDirectoryOperation final : public IMetadataOperation +{ + RemoveDirectoryOperation(const std::string & path_, IDisk & disk_); + + void execute() override; + + void undo() override; + +private: + std::string path; + IDisk & disk; +}; + +struct RemoveRecursiveOperation final : public IMetadataOperation +{ + RemoveRecursiveOperation(const std::string & path_, IDisk & disk_); + + void execute() override; + + void undo() override; + + void finalize() override; + +private: + std::string path; + IDisk & disk; + std::string temp_path; +}; + + +struct CreateHardlinkOperation final : public IMetadataOperation +{ + CreateHardlinkOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_); + + void execute() override; + + void undo() override; + +private: + std::string path_from; + std::string path_to; + IDisk & disk; +}; + + +struct MoveFileOperation final : public IMetadataOperation +{ + MoveFileOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_); + + void execute() override; + + void undo() override; + +private: + std::string path_from; + std::string path_to; + IDisk & disk; +}; + + +struct MoveDirectoryOperation final : public IMetadataOperation +{ + MoveDirectoryOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_); + + void execute() override; + + void undo() override; + +private: + std::string path_from; + std::string path_to; + IDisk & disk; +}; + + +struct ReplaceFileOperation final : public IMetadataOperation +{ + ReplaceFileOperation(const std::string & path_from_, const std::string & path_to_, IDisk & disk_); + + void execute() override; + + void undo() override; + + void finalize() override; + +private: + std::string path_from; + std::string path_to; + IDisk & disk; + std::string temp_path_to; +}; + + +struct WriteFileOperation final : public IMetadataOperation +{ + WriteFileOperation(const std::string & path_, IDisk & disk_, const std::string & data_); + + void execute() override; + + void undo() override; + +private: + std::string path; + IDisk & disk; + std::string data; + bool existed = false; + std::string prev_data; +}; + +} From ac937c7fd630b49eb82ae0275920401cdbcd8953 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 21 Jun 2022 19:21:36 +0200 Subject: [PATCH 07/17] Add comment --- .../MetadataStorageFromDiskTransactionOperations.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index 322eafdef6f..ba8b3ef6b4b 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -6,6 +6,9 @@ namespace DB { class IDisk; +/** + * Implementations for transactional operations with metadata used by MetadataStorageFromDisk. + */ struct IMetadataOperation { From cc2f01c41deca6f683d5b6970cf1059c9b98f75a Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 22 Jun 2022 14:28:27 +0200 Subject: [PATCH 08/17] Fix name clash --- .../DiskObjectStorageTransaction.cpp | 67 ++++++++++--------- .../DiskObjectStorageTransaction.h | 1 + ...dataStorageFromDiskTransactionOperations.h | 1 - 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index b0a180e2c53..cd43ea81be0 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -30,13 +30,15 @@ DiskObjectStorageTransaction::DiskObjectStorageTransaction( , metadata_helper(metadata_helper_) {} +namespace +{ /// Operation which affects only metadata. Simplest way to /// implement via callback. -struct PureMetadataOperation final : public IDiskObjectStorageOperation +struct PureMetadataObjectStorageOperation final : public IDiskObjectStorageOperation { std::function on_execute; - PureMetadataOperation( + PureMetadataObjectStorageOperation( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, std::function && on_execute_) @@ -58,7 +60,7 @@ struct PureMetadataOperation final : public IDiskObjectStorageOperation } }; -struct RemoveObjectOperation final : public IDiskObjectStorageOperation +struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; bool delete_metadata_only; @@ -66,7 +68,7 @@ struct RemoveObjectOperation final : public IDiskObjectStorageOperation std::vector paths_to_remove; bool if_exists; - RemoveObjectOperation( + RemoveObjectStorageOperation( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, const std::string & path_, @@ -138,7 +140,7 @@ struct RemoveObjectOperation final : public IDiskObjectStorageOperation } }; -struct RemoveRecursiveOperation final : public IDiskObjectStorageOperation +struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; std::unordered_map> paths_to_remove; @@ -146,7 +148,7 @@ struct RemoveRecursiveOperation final : public IDiskObjectStorageOperation NameSet file_names_remove_metadata_only; std::vector path_to_remove_from_cache; - RemoveRecursiveOperation( + RemoveRecursiveObjectStorageOperation( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, const std::string & path_, @@ -232,13 +234,13 @@ struct RemoveRecursiveOperation final : public IDiskObjectStorageOperation }; -struct ReplaceFileOperation final : public IDiskObjectStorageOperation +struct ReplaceFileObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path_from; std::string path_to; std::vector blobs_to_remove; - ReplaceFileOperation( + ReplaceFileObjectStorageOperation( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, const std::string & path_from_, @@ -271,12 +273,12 @@ struct ReplaceFileOperation final : public IDiskObjectStorageOperation } }; -struct WriteFileOperation final : public IDiskObjectStorageOperation +struct WriteFileObjectStorageOperation final : public IDiskObjectStorageOperation { std::string path; std::string blob_path; - WriteFileOperation( + WriteFileObjectStorageOperation( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, const std::string & path_, @@ -303,7 +305,7 @@ struct WriteFileOperation final : public IDiskObjectStorageOperation }; -struct CopyFileOperation final : public IDiskObjectStorageOperation +struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation { std::string from_path; std::string to_path; @@ -311,7 +313,7 @@ struct CopyFileOperation final : public IDiskObjectStorageOperation std::vector created_blobs; - CopyFileOperation( + CopyFileObjectStorageOperation( IObjectStorage & object_storage_, IMetadataStorage & metadata_storage_, const std::string & from_path_, @@ -352,10 +354,12 @@ struct CopyFileOperation final : public IDiskObjectStorageOperation } }; +} + void DiskObjectStorageTransaction::createDirectory(const std::string & path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) { tx->createDirectory(path); })); @@ -364,7 +368,7 @@ void DiskObjectStorageTransaction::createDirectory(const std::string & path) void DiskObjectStorageTransaction::createDirectories(const std::string & path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) { tx->createDicrectoryRecursive(path); })); @@ -374,7 +378,7 @@ void DiskObjectStorageTransaction::createDirectories(const std::string & path) void DiskObjectStorageTransaction::moveDirectory(const std::string & from_path, const std::string & to_path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [from_path, to_path](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [from_path, to_path](MetadataTransactionPtr tx) { tx->moveDirectory(from_path, to_path); })); @@ -383,7 +387,7 @@ void DiskObjectStorageTransaction::moveDirectory(const std::string & from_path, void DiskObjectStorageTransaction::moveFile(const String & from_path, const String & to_path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [from_path, to_path, this](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [from_path, to_path, this](MetadataTransactionPtr tx) { if (metadata_storage.exists(to_path)) throw Exception("File already exists: " + to_path, ErrorCodes::FILE_ALREADY_EXISTS); @@ -397,7 +401,7 @@ void DiskObjectStorageTransaction::moveFile(const String & from_path, const Stri void DiskObjectStorageTransaction::replaceFile(const std::string & from_path, const std::string & to_path) { - operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, from_path, to_path)); + operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, from_path, to_path)); } void DiskObjectStorageTransaction::clearDirectory(const std::string & path) @@ -416,23 +420,23 @@ void DiskObjectStorageTransaction::removeFile(const std::string & path) void DiskObjectStorageTransaction::removeSharedFile(const std::string & path, bool keep_shared_data) { - operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, keep_shared_data, false)); + operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, keep_shared_data, false)); } void DiskObjectStorageTransaction::removeSharedRecursive(const std::string & path, bool keep_all_shared_data, const NameSet & file_names_remove_metadata_only) { - operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, keep_all_shared_data, file_names_remove_metadata_only)); + operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, keep_all_shared_data, file_names_remove_metadata_only)); } void DiskObjectStorageTransaction::removeSharedFileIfExists(const std::string & path, bool keep_shared_data) { - operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, keep_shared_data, true)); + operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, keep_shared_data, true)); } void DiskObjectStorageTransaction::removeDirectory(const std::string & path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) { tx->removeDirectory(path); })); @@ -494,19 +498,20 @@ std::unique_ptr DiskObjectStorageTransaction::writeFile auto blob_path = fs::path(remote_fs_root_path) / blob_name; - operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, blob_path)); - auto create_metadata_callback = [tx = shared_from_this(), this, mode, path, blob_name, autocommit] (size_t count) + auto create_metadata_callback = [tx = shared_from_this(), mode, path, blob_name, autocommit] (size_t count) { if (mode == WriteMode::Rewrite) - metadata_transaction->createMetadataFile(path, blob_name, count); + tx->metadata_transaction->createMetadataFile(path, blob_name, count); else - metadata_transaction->addBlobToMetadata(path, blob_name, count); + tx->metadata_transaction->addBlobToMetadata(path, blob_name, count); if (autocommit) - metadata_transaction->commit(); + tx->metadata_transaction->commit(); }; + operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, path, blob_path)); + /// We always use mode Rewrite because we simulate append using metadata and different files return object_storage.writeObject( blob_path, WriteMode::Rewrite, object_attributes, @@ -518,7 +523,7 @@ std::unique_ptr DiskObjectStorageTransaction::writeFile void DiskObjectStorageTransaction::createHardLink(const std::string & src_path, const std::string & dst_path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [src_path, dst_path](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [src_path, dst_path](MetadataTransactionPtr tx) { tx->createHardLink(src_path, dst_path); })); @@ -527,7 +532,7 @@ void DiskObjectStorageTransaction::createHardLink(const std::string & src_path, void DiskObjectStorageTransaction::setReadOnly(const std::string & path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) { tx->setReadOnly(path); })); @@ -536,7 +541,7 @@ void DiskObjectStorageTransaction::setReadOnly(const std::string & path) void DiskObjectStorageTransaction::setLastModified(const std::string & path, const Poco::Timestamp & timestamp) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [path, timestamp](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [path, timestamp](MetadataTransactionPtr tx) { tx->setLastModified(path, timestamp); })); @@ -545,7 +550,7 @@ void DiskObjectStorageTransaction::setLastModified(const std::string & path, con void DiskObjectStorageTransaction::createFile(const std::string & path) { operations_to_execute.emplace_back( - std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) + std::make_unique(object_storage, metadata_storage, [path](MetadataTransactionPtr tx) { tx->createEmptyMetadataFile(path); })); @@ -553,7 +558,7 @@ void DiskObjectStorageTransaction::createFile(const std::string & path) void DiskObjectStorageTransaction::copyFile(const std::string & from_file_path, const std::string & to_file_path) { - operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, from_file_path, to_file_path, remote_fs_root_path)); + operations_to_execute.emplace_back(std::make_unique(object_storage, metadata_storage, from_file_path, to_file_path, remote_fs_root_path)); } void DiskObjectStorageTransaction::commit() diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h index 24bbadb2cdd..362b5404707 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h @@ -72,6 +72,7 @@ public: void createDirectories(const std::string & path) override; void clearDirectory(const std::string & path) override; + void moveDirectory(const std::string & from_path, const std::string & to_path) override; void moveFile(const String & from_path, const String & to_path) override; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h index ba8b3ef6b4b..f998771a68f 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.h @@ -181,7 +181,6 @@ struct WriteFileOperation final : public IMetadataOperation void execute() override; void undo() override; - private: std::string path; IDisk & disk; From 87b3f331e227459f1d4095e4542df0e6eb3625d6 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Wed, 22 Jun 2022 14:57:29 +0200 Subject: [PATCH 09/17] Update MetadataStorageFromDiskTransactionOperations.cpp --- .../MetadataStorageFromDiskTransactionOperations.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp index 9d407148296..dce4ae2f1f7 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp @@ -45,7 +45,6 @@ void UnlinkFileOperation::execute() { auto buf = disk.readFile(path); readStringUntilEOF(prev_data, *buf); - std::cerr << "\n\n\nRemove file: " << path << "\n\n\n"; disk.removeFile(path); } From ca0299e927a2f8d724e6e678b66f1c4f88360b20 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 22 Jun 2022 19:01:46 +0200 Subject: [PATCH 10/17] Ban projections for zero-copy replication in a right way --- src/Storages/MergeTree/MergeTreeData.cpp | 8 -------- .../MergeTree/registerStorageMergeTree.cpp | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index df864a2725d..c24636a56f8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -280,14 +280,6 @@ MergeTreeData::MergeTreeData( /// Creating directories, if not exist. for (const auto & disk : getDisks()) { - /// TODO: implement it the main issue in DataPartsExchange (not able to send directories metadata) - if (supportsReplication() && settings->allow_remote_fs_zero_copy_replication - && disk->supportZeroCopyReplication() && metadata_.hasProjections()) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Projections are not supported when zero-copy replication is enabled for table. " - "Currently disk '{}' supports zero copy replication", disk->getName()); - } - if (disk->isBroken()) continue; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 43e1af21eac..4d7121b9a39 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -673,6 +673,20 @@ static StoragePtr create(const StorageFactory::Arguments & args) throw Exception("Wrong number of engine arguments.", ErrorCodes::BAD_ARGUMENTS); if (replicated) + { + auto storage_policy = args.getContext()->getStoragePolicy(storage_settings->storage_policy); + + for (const auto & disk : storage_policy->getDisks()) + { + /// TODO: implement it the main issue in DataPartsExchange (not able to send directories metadata) + if (storage_settings->allow_remote_fs_zero_copy_replication + && disk->supportZeroCopyReplication() && metadata.hasProjections()) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Projections are not supported when zero-copy replication is enabled for table. " + "Currently disk '{}' supports zero copy replication", disk->getName()); + } + } + return std::make_shared( zookeeper_path, replica_name, @@ -686,6 +700,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) std::move(storage_settings), args.has_force_restore_data_flag, renaming_restrictions); + } else return std::make_shared( args.table_id, From 9db64952c0630bcd09d416499b0c88e38057337c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 22 Jun 2022 21:10:21 +0300 Subject: [PATCH 11/17] Fix SIGSEGV in optimization in PartialSortingTransform Fixes: #37992 (cc @kitaisreal) Signed-off-by: Azat Khuzhin --- src/Processors/Transforms/PartialSortingTransform.cpp | 2 +- ...02345_partial_sort_transform_optimization.reference | 10 ++++++++++ .../02345_partial_sort_transform_optimization.sql | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference create mode 100644 tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 131bf4f8e7c..b0f866cb3fd 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -71,7 +71,7 @@ bool compareWithThreshold(const ColumnRawPtrs & raw_block_columns, size_t min_bl size_t raw_block_columns_size = raw_block_columns.size(); for (size_t i = 0; i < raw_block_columns_size; ++i) { - int res = sort_description[i].direction * raw_block_columns[i]->compareAt(min_block_index, 0, *threshold_columns[0], sort_description[i].nulls_direction); + int res = sort_description[i].direction * raw_block_columns[i]->compareAt(min_block_index, 0, *threshold_columns[i], sort_description[i].nulls_direction); if (res < 0) return true; diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference new file mode 100644 index 00000000000..e6c99ff9291 --- /dev/null +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference @@ -0,0 +1,10 @@ +0 999999 999999 +0 999998 999998 +0 999997 999997 +0 999996 999996 +0 999995 999995 +0 999994 999994 +0 999993 999993 +0 999992 999992 +0 999991 999991 +0 999990 999990 diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql new file mode 100644 index 00000000000..e7855c47474 --- /dev/null +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -0,0 +1,3 @@ +-- Regression for PartialSortingTransform optimization +-- that requires at least 1500 rows. +select * from (select * from (select 0 a, toNullable(number) b, toString(number) c from numbers(1e6)) order by a desc, b desc, c limit 1500) limit 10; From 1b495ec8adcf15d7af6d5b309f6e11639c8f3b61 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 23 Jun 2022 00:17:37 +0200 Subject: [PATCH 12/17] Fix style --- src/Storages/MergeTree/registerStorageMergeTree.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 4d7121b9a39..73b9b364f7f 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -33,6 +33,7 @@ namespace ErrorCodes extern const int UNKNOWN_STORAGE; extern const int NO_REPLICA_NAME_GIVEN; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int NOT_IMPLEMENTED; } From 29bd0360134dc3bd366043256cdc9d69f1348924 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 22 Jun 2022 21:58:43 -0400 Subject: [PATCH 13/17] separated queries, specified UNION DISTINCT --- .../example-datasets/brown-benchmark.md | 83 +++++++++++++------ 1 file changed, 56 insertions(+), 27 deletions(-) diff --git a/docs/en/getting-started/example-datasets/brown-benchmark.md b/docs/en/getting-started/example-datasets/brown-benchmark.md index b8e6140c60f..7b995cd8345 100644 --- a/docs/en/getting-started/example-datasets/brown-benchmark.md +++ b/docs/en/getting-started/example-datasets/brown-benchmark.md @@ -17,11 +17,12 @@ Unpack the data: xz -v -d mgbench{1..3}.csv.xz ``` -Create tables: -``` +Create the database and tables: +```sql CREATE DATABASE mgbench; +``` - +```sql CREATE TABLE mgbench.logs1 ( log_time DateTime, machine_name LowCardinality(String), @@ -47,8 +48,10 @@ CREATE TABLE mgbench.logs1 ( ) ENGINE = MergeTree() ORDER BY (machine_group, machine_name, log_time); +``` +```sql CREATE TABLE mgbench.logs2 ( log_time DateTime, client_ip IPv4, @@ -58,8 +61,10 @@ CREATE TABLE mgbench.logs2 ( ) ENGINE = MergeTree() ORDER BY log_time; +``` +```sql CREATE TABLE mgbench.logs3 ( log_time DateTime64, device_id FixedString(15), @@ -83,7 +88,7 @@ clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbe ``` Run benchmark queries: -``` +```sql -- Q1.1: What is the CPU/network utilization for each web server since midnight? SELECT machine_name, @@ -101,26 +106,29 @@ FROM ( COALESCE(cpu_user, 0.0) AS cpu, COALESCE(bytes_in, 0.0) AS net_in, COALESCE(bytes_out, 0.0) AS net_out - FROM logs1 + FROM mgbench.logs1 WHERE machine_name IN ('anansi','aragog','urd') AND log_time >= TIMESTAMP '2017-01-11 00:00:00' ) AS r GROUP BY machine_name; +``` +```sql -- Q1.2: Which computer lab machines have been offline in the past day? SELECT machine_name, log_time -FROM logs1 +FROM mgbench.logs1 WHERE (machine_name LIKE 'cslab%' OR machine_name LIKE 'mslab%') AND load_one IS NULL AND log_time >= TIMESTAMP '2017-01-10 00:00:00' ORDER BY machine_name, log_time; +``` - +```sql -- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation? SELECT dt, @@ -138,7 +146,7 @@ FROM ( load_one, mem_free, swap_free - FROM logs1 + FROM mgbench.logs1 WHERE machine_name = 'babbage' AND load_fifteen IS NOT NULL AND load_five IS NOT NULL @@ -151,13 +159,14 @@ GROUP BY dt, hr ORDER BY dt, hr; +``` - +```sql -- Q1.4: Over 1 month, how often was each server blocked on disk I/O? SELECT machine_name, COUNT(*) AS spikes -FROM logs1 +FROM mgbench.logs1 WHERE machine_group = 'Servers' AND cpu_wio > 0.99 AND log_time >= TIMESTAMP '2016-12-01 00:00:00' @@ -165,8 +174,9 @@ WHERE machine_group = 'Servers' GROUP BY machine_name ORDER BY spikes DESC LIMIT 10; +``` - +```sql -- Q1.5: Which externally reachable VMs have run low on memory? SELECT machine_name, @@ -176,7 +186,7 @@ FROM ( SELECT machine_name, CAST(log_time AS DATE) AS dt, mem_free - FROM logs1 + FROM mgbench.logs1 WHERE machine_group = 'DMZ' AND mem_free IS NOT NULL ) AS r @@ -185,8 +195,9 @@ GROUP BY machine_name, HAVING MIN(mem_free) < 10000 ORDER BY machine_name, dt; +``` - +```sql -- Q1.6: What is the total hourly network traffic across all file servers? SELECT dt, @@ -199,7 +210,7 @@ FROM ( EXTRACT(HOUR FROM log_time) AS hr, COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in, COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out - FROM logs1 + FROM mgbench.logs1 WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon', 'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey', 'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps', @@ -210,28 +221,32 @@ GROUP BY dt, hr ORDER BY both_sum DESC LIMIT 10; +``` - +```sql -- Q2.1: Which requests have caused server errors within the past 2 weeks? SELECT * -FROM logs2 +FROM mgbench.logs2 WHERE status_code >= 500 AND log_time >= TIMESTAMP '2012-12-18 00:00:00' ORDER BY log_time; +``` - +```sql -- Q2.2: During a specific 2-week period, was the user password file leaked? SELECT * -FROM logs2 +FROM mgbench.logs2 WHERE status_code >= 200 AND status_code < 300 AND request LIKE '%/etc/passwd%' AND log_time >= TIMESTAMP '2012-05-06 00:00:00' AND log_time < TIMESTAMP '2012-05-20 00:00:00'; +``` +```sql -- Q2.3: What was the average path depth for top-level requests in the past month? SELECT top_level, @@ -242,7 +257,7 @@ FROM ( FROM ( SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len, request - FROM logs2 + FROM mgbench.logs2 WHERE status_code >= 200 AND status_code < 300 AND log_time >= TIMESTAMP '2012-12-01 00:00:00' @@ -254,19 +269,23 @@ WHERE top_level IN ('/about','/courses','/degrees','/events', '/publications','/research','/teaching','/ugrad') GROUP BY top_level ORDER BY top_level; +``` +```sql -- Q2.4: During the last 3 months, which clients have made an excessive number of requests? SELECT client_ip, COUNT(*) AS num_requests -FROM logs2 +FROM mgbench.logs2 WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00' GROUP BY client_ip HAVING COUNT(*) >= 100000 ORDER BY num_requests DESC; +``` +```sql -- Q2.5: What are the daily unique visitors? SELECT dt, @@ -274,12 +293,14 @@ SELECT dt, FROM ( SELECT CAST(log_time AS DATE) AS dt, client_ip - FROM logs2 + FROM mgbench.logs2 ) AS r GROUP BY dt ORDER BY dt; +``` +```sql -- Q2.6: What are the average and maximum data transfer rates (Gbps)? SELECT AVG(transfer) / 125000000.0 AS transfer_avg, @@ -287,33 +308,39 @@ SELECT AVG(transfer) / 125000000.0 AS transfer_avg, FROM ( SELECT log_time, SUM(object_size) AS transfer - FROM logs2 + FROM mgbench.logs2 GROUP BY log_time ) AS r; +``` +```sql -- Q3.1: Did the indoor temperature reach freezing over the weekend? SELECT * -FROM logs3 +FROM mgbench.logs3 WHERE event_type = 'temperature' AND event_value <= 32.0 AND log_time >= '2019-11-29 17:00:00.000'; +``` +```sql -- Q3.4: Over the past 6 months, how frequently were each door opened? SELECT device_name, device_floor, COUNT(*) AS ct -FROM logs3 +FROM mgbench.logs3 WHERE event_type = 'door_open' AND log_time >= '2019-06-01 00:00:00.000' GROUP BY device_name, device_floor ORDER BY ct DESC; +``` +```sql -- Q3.5: Where in the building do large temperature variations occur in winter and summer? WITH temperature AS ( @@ -335,7 +362,7 @@ WITH temperature AS ( device_type, device_floor, event_value - FROM logs3 + FROM mgbench.logs3 WHERE event_type = 'temperature' ) AS r GROUP BY dt, @@ -357,7 +384,7 @@ SELECT DISTINCT device_name, FROM temperature WHERE dt >= DATE '2018-12-01' AND dt < DATE '2019-03-01' -UNION +UNION DISTINCT SELECT DISTINCT device_name, device_type, device_floor, @@ -365,8 +392,10 @@ SELECT DISTINCT device_name, FROM temperature WHERE dt >= DATE '2019-06-01' AND dt < DATE '2019-09-01'; +``` +```sql -- Q3.6: For each device category, what are the monthly power consumption metrics? SELECT yr, @@ -397,7 +426,7 @@ FROM ( CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer, CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector, CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending - FROM logs3 + FROM mgbench.logs3 WHERE device_type = 'meter' ) AS r GROUP BY dt, From eef10111a6f7e85bcb9739835c6f5f9721ad1880 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 23 Jun 2022 13:05:28 +0200 Subject: [PATCH 14/17] Test 02305_schema_inference_with_globs test --- .../0_stateless/02305_schema_inference_with_globs.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/02305_schema_inference_with_globs.sh b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh index 33969e8b0f7..6ebd17be777 100755 --- a/tests/queries/0_stateless/02305_schema_inference_with_globs.sh +++ b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, disabled +# Tags: no-fasttest CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -10,9 +10,9 @@ $CLICKHOUSE_CLIENT -q "insert into function file(data2.jsonl) select NULL as x f $CLICKHOUSE_CLIENT -q "insert into function file(data3.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" $CLICKHOUSE_CLIENT -q "insert into function file(data4.jsonl) select number % 2 ? number : NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=8" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=16" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=24" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=31" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=8" +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=16" +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=24" +$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=31" $CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=32" $CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=100" From 272d9aea522c9cc40ace1dadfb1447bf1c484d2f Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 23 Jun 2022 13:34:28 +0200 Subject: [PATCH 15/17] Update 02305_schema_inference_with_globs.sh --- .../02305_schema_inference_with_globs.sh | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/02305_schema_inference_with_globs.sh b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh index 6ebd17be777..f38b004bdd8 100755 --- a/tests/queries/0_stateless/02305_schema_inference_with_globs.sh +++ b/tests/queries/0_stateless/02305_schema_inference_with_globs.sh @@ -5,14 +5,14 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -q "insert into function file(data1.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" -$CLICKHOUSE_CLIENT -q "insert into function file(data2.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" -$CLICKHOUSE_CLIENT -q "insert into function file(data3.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" -$CLICKHOUSE_CLIENT -q "insert into function file(data4.jsonl) select number % 2 ? number : NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(02305_data1.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(02305_data2.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(02305_data3.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(02305_data4.jsonl) select number % 2 ? number : NULL as x from numbers(10) settings engine_file_truncate_on_insert=1" -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=8" -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=16" -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=24" -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=31" -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=32" -$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=100" +$CLICKHOUSE_CLIENT -q "desc file('02305_data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=8" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('02305_data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=16" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('02305_data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=24" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('02305_data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=31" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA'; +$CLICKHOUSE_CLIENT -q "desc file('02305_data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=32" +$CLICKHOUSE_CLIENT -q "desc file('02305_data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=100" From 42e70b7cd35c0b326957c7e6a7010e1a1d06e30c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 23 Jun 2022 13:38:25 +0200 Subject: [PATCH 16/17] Document why the submodule check does not halt the configuration --- contrib/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 943e0e0ebc1..09cf80595a7 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -37,6 +37,8 @@ function(add_contrib cmake_folder) file(GLOB contrib_files "${base_folder}/*") if (NOT contrib_files) + # Checking out *all* submodules takes > 5 min. Therefore, the smoke build ("FastTest") in CI initializes only the set of + # submodules minimally needed for a build and we cannot assume here that all submodules are populated. message(STATUS "submodule ${base_folder} is missing or empty. to fix try run:") message(STATUS " git submodule update --init") return() From accc479d88a4c44f671c4bbc3da1927ee70bc148 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 23 Jun 2022 09:54:11 -0400 Subject: [PATCH 17/17] revert query changes, add setting --- .../example-datasets/brown-benchmark.md | 49 ++++++++++++------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/docs/en/getting-started/example-datasets/brown-benchmark.md b/docs/en/getting-started/example-datasets/brown-benchmark.md index 7b995cd8345..8da7a18bac3 100644 --- a/docs/en/getting-started/example-datasets/brown-benchmark.md +++ b/docs/en/getting-started/example-datasets/brown-benchmark.md @@ -22,6 +22,10 @@ Create the database and tables: CREATE DATABASE mgbench; ``` +```sql +USE mgbench; +``` + ```sql CREATE TABLE mgbench.logs1 ( log_time DateTime, @@ -87,7 +91,12 @@ clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbe clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv ``` -Run benchmark queries: +## Run benchmark queries: + +```sql +USE mgbench; +``` + ```sql -- Q1.1: What is the CPU/network utilization for each web server since midnight? @@ -106,7 +115,7 @@ FROM ( COALESCE(cpu_user, 0.0) AS cpu, COALESCE(bytes_in, 0.0) AS net_in, COALESCE(bytes_out, 0.0) AS net_out - FROM mgbench.logs1 + FROM logs1 WHERE machine_name IN ('anansi','aragog','urd') AND log_time >= TIMESTAMP '2017-01-11 00:00:00' ) AS r @@ -119,7 +128,7 @@ GROUP BY machine_name; SELECT machine_name, log_time -FROM mgbench.logs1 +FROM logs1 WHERE (machine_name LIKE 'cslab%' OR machine_name LIKE 'mslab%') AND load_one IS NULL @@ -146,7 +155,7 @@ FROM ( load_one, mem_free, swap_free - FROM mgbench.logs1 + FROM logs1 WHERE machine_name = 'babbage' AND load_fifteen IS NOT NULL AND load_five IS NOT NULL @@ -166,7 +175,7 @@ ORDER BY dt, SELECT machine_name, COUNT(*) AS spikes -FROM mgbench.logs1 +FROM logs1 WHERE machine_group = 'Servers' AND cpu_wio > 0.99 AND log_time >= TIMESTAMP '2016-12-01 00:00:00' @@ -186,7 +195,7 @@ FROM ( SELECT machine_name, CAST(log_time AS DATE) AS dt, mem_free - FROM mgbench.logs1 + FROM logs1 WHERE machine_group = 'DMZ' AND mem_free IS NOT NULL ) AS r @@ -210,7 +219,7 @@ FROM ( EXTRACT(HOUR FROM log_time) AS hr, COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in, COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out - FROM mgbench.logs1 + FROM logs1 WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon', 'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey', 'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps', @@ -227,7 +236,7 @@ LIMIT 10; -- Q2.1: Which requests have caused server errors within the past 2 weeks? SELECT * -FROM mgbench.logs2 +FROM logs2 WHERE status_code >= 500 AND log_time >= TIMESTAMP '2012-12-18 00:00:00' ORDER BY log_time; @@ -237,7 +246,7 @@ ORDER BY log_time; -- Q2.2: During a specific 2-week period, was the user password file leaked? SELECT * -FROM mgbench.logs2 +FROM logs2 WHERE status_code >= 200 AND status_code < 300 AND request LIKE '%/etc/passwd%' @@ -257,7 +266,7 @@ FROM ( FROM ( SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len, request - FROM mgbench.logs2 + FROM logs2 WHERE status_code >= 200 AND status_code < 300 AND log_time >= TIMESTAMP '2012-12-01 00:00:00' @@ -277,7 +286,7 @@ ORDER BY top_level; SELECT client_ip, COUNT(*) AS num_requests -FROM mgbench.logs2 +FROM logs2 WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00' GROUP BY client_ip HAVING COUNT(*) >= 100000 @@ -293,7 +302,7 @@ SELECT dt, FROM ( SELECT CAST(log_time AS DATE) AS dt, client_ip - FROM mgbench.logs2 + FROM logs2 ) AS r GROUP BY dt ORDER BY dt; @@ -308,7 +317,7 @@ SELECT AVG(transfer) / 125000000.0 AS transfer_avg, FROM ( SELECT log_time, SUM(object_size) AS transfer - FROM mgbench.logs2 + FROM logs2 GROUP BY log_time ) AS r; ``` @@ -318,7 +327,7 @@ FROM ( -- Q3.1: Did the indoor temperature reach freezing over the weekend? SELECT * -FROM mgbench.logs3 +FROM logs3 WHERE event_type = 'temperature' AND event_value <= 32.0 AND log_time >= '2019-11-29 17:00:00.000'; @@ -331,7 +340,7 @@ WHERE event_type = 'temperature' SELECT device_name, device_floor, COUNT(*) AS ct -FROM mgbench.logs3 +FROM logs3 WHERE event_type = 'door_open' AND log_time >= '2019-06-01 00:00:00.000' GROUP BY device_name, @@ -339,6 +348,10 @@ GROUP BY device_name, ORDER BY ct DESC; ``` +Query 3.5 below uses a UNION. Set the mode for combining SELECT query results. The setting is only used when shared with UNION without explicitly specifying the UNION ALL or UNION DISTINCT. +```sql +SET union_default_mode = 'DISTINCT' +``` ```sql -- Q3.5: Where in the building do large temperature variations occur in winter and summer? @@ -362,7 +375,7 @@ WITH temperature AS ( device_type, device_floor, event_value - FROM mgbench.logs3 + FROM logs3 WHERE event_type = 'temperature' ) AS r GROUP BY dt, @@ -384,7 +397,7 @@ SELECT DISTINCT device_name, FROM temperature WHERE dt >= DATE '2018-12-01' AND dt < DATE '2019-03-01' -UNION DISTINCT +UNION SELECT DISTINCT device_name, device_type, device_floor, @@ -426,7 +439,7 @@ FROM ( CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer, CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector, CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending - FROM mgbench.logs3 + FROM logs3 WHERE device_type = 'meter' ) AS r GROUP BY dt,