From c80eac0dc3e32f954572b3e29dd93125b4256201 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 12 Jun 2021 14:29:16 +0000 Subject: [PATCH 01/86] First version --- src/Disks/DiskStatic.cpp | 136 ++++++++++++++++++ src/Disks/DiskStatic.h | 55 +++++++ src/Disks/DiskType.h | 6 +- src/Disks/HDFS/DiskHDFS.cpp | 13 +- src/Disks/HDFS/DiskHDFS.h | 3 + src/Disks/IDiskRemote.cpp | 7 +- src/Disks/IDiskRemote.h | 95 +++++++----- src/Disks/ReadIndirectBufferFromRemoteFS.cpp | 7 +- src/Disks/ReadIndirectBufferFromRemoteFS.h | 4 - src/Disks/WriteIndirectBufferFromRemoteFS.cpp | 7 +- src/Disks/WriteIndirectBufferFromRemoteFS.h | 4 - src/Disks/registerDisks.cpp | 4 + src/IO/ReadBufferFromStatic.cpp | 110 ++++++++++++++ src/IO/ReadBufferFromStatic.h | 38 +++++ 14 files changed, 427 insertions(+), 62 deletions(-) create mode 100644 src/Disks/DiskStatic.cpp create mode 100644 src/Disks/DiskStatic.h create mode 100644 src/IO/ReadBufferFromStatic.cpp create mode 100644 src/IO/ReadBufferFromStatic.h diff --git a/src/Disks/DiskStatic.cpp b/src/Disks/DiskStatic.cpp new file mode 100644 index 00000000000..b3f3752ab43 --- /dev/null +++ b/src/Disks/DiskStatic.cpp @@ -0,0 +1,136 @@ +#include "DiskStatic.h" + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace fs = std::filesystem; + + +namespace DB +{ + +class ReadIndirectBufferFromStatic final : public ReadIndirectBufferFromRemoteFS +{ +public: + ReadIndirectBufferFromStatic( + const String & url_, + DiskStatic::Metadata metadata_, + ContextPtr context_, + size_t max_read_tries_, + size_t buf_size_) + : ReadIndirectBufferFromRemoteFS(metadata_) + , url(url_) + , context(context_) + , max_read_tries(max_read_tries_) + , buf_size(buf_size_) + { + } + + std::unique_ptr createReadBuffer(const String & path) override + { + return std::make_unique(url + path, context, max_read_tries, buf_size); + } + +private: + String url; + ContextPtr context; + size_t max_read_tries; + size_t buf_size; +}; + + +DiskStatic::DiskStatic(const String & disk_name_, + const String & files_root_path_url_, + const String & metadata_path_, + ContextPtr context_, + SettingsPtr settings_) + : IDiskRemote(disk_name_, files_root_path_url_, metadata_path_, "DiskStatic", settings_->thread_pool_size) + , WithContext(context_->getGlobalContext()) + , settings(std::move(settings_)) +{ +} + + +void DiskStatic::startup() +{ +} + + +std::unique_ptr DiskStatic::readFile(const String & path, size_t buf_size, size_t, size_t, size_t, MMappedFileCache *) const +{ + auto metadata = readMeta(path); + + LOG_DEBUG(log, "Read from file by path: {}. Existing objects: {}", + backQuote(metadata_path + path), metadata.remote_fs_objects.size()); + + auto reader = std::make_unique(remote_fs_root_path, metadata, getContext(), 1, buf_size); + return std::make_unique(std::move(reader), settings->min_bytes_for_seek); +} + + +std::unique_ptr DiskStatic::writeFile(const String & path, size_t buf_size, WriteMode mode) +{ + auto metadata = readOrCreateMetaForWriting(path, mode); + /// Update read_only flag in IDiskRemote::metadata. + /// setReadOnly(); + + auto file_name = generateName(); + auto file_path = remote_fs_root_path + file_name; + + LOG_DEBUG(log, "Write to file url: {}", file_path); + + auto timeouts = ConnectionTimeouts::getHTTPTimeouts(getContext()); + + Poco::URI uri(file_path); + auto writer = std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_PUT, timeouts, buf_size); + + return std::make_unique>(std::move(writer), + std::move(metadata), + file_name); +} + + +namespace +{ +std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) +{ + return std::make_unique( + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".thread_pool_size", 16)); +} +} + +void registerDiskStatic(DiskFactory & factory) +{ + auto creator = [](const String & disk_name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextConstPtr context) -> DiskPtr + { + fs::path disk = fs::path(context->getPath()) / "disks" / disk_name; + fs::create_directories(disk); + + String url{config.getString(config_prefix + ".endpoint")}; + if (!url.ends_with('/')) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "URL must end with '/', but '{}' doesn't.", url); + + String metadata_path = fs::path(context->getPath()) / "disks" / disk_name / ""; + + return std::make_shared(disk_name, url, metadata_path, context, getSettings(config, config_prefix)); + }; + + factory.registerDiskType("static", creator); +} + + +} diff --git a/src/Disks/DiskStatic.h b/src/Disks/DiskStatic.h new file mode 100644 index 00000000000..9f3c2598236 --- /dev/null +++ b/src/Disks/DiskStatic.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +struct DiskStaticSettings +{ + size_t min_bytes_for_seek; + int thread_pool_size; + int objects_chunk_size_to_delete; + + DiskStaticSettings( + int min_bytes_for_seek_, + int thread_pool_size_) + : min_bytes_for_seek(min_bytes_for_seek_) + , thread_pool_size(thread_pool_size_) {} +}; + + +class DiskStatic : public IDiskRemote, WithContext +{ +using SettingsPtr = std::unique_ptr; + +public: + DiskStatic(const String & disk_name_, + const String & files_root_path_url_, + const String & metadata_path_, + ContextPtr context, + SettingsPtr settings_); + + DiskType::Type getType() const override { return DiskType::Type::Static; } + + virtual void startup() override; + + std::unique_ptr readFile( + const String & path, + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold, + MMappedFileCache * mmap_cache) const override; + + std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; + +private: + String generateName() { return toString(UUIDHelpers::generateV4()); } + + SettingsPtr settings; +}; + +} diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index a5c23fe2c2c..8a10a790be0 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -12,8 +12,10 @@ struct DiskType Local, RAM, S3, - HDFS + HDFS, + Static }; + static String toString(Type disk_type) { switch (disk_type) @@ -26,6 +28,8 @@ struct DiskType return "s3"; case Type::HDFS: return "hdfs"; + case Type::Static: + return "static"; } __builtin_unreachable(); } diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index dafd507ba1e..9d40c613ecf 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -67,7 +67,7 @@ public: std::unique_ptr createReadBuffer(const String & path) override { - return std::make_unique(hdfs_uri, hdfs_directory + path, config, buf_size); + return std::make_unique(hdfs_uri, fs::path(hdfs_directory) / path, config, buf_size); } private: @@ -97,9 +97,8 @@ std::unique_ptr DiskHDFS::readFile(const String & path, { auto metadata = readMeta(path); - LOG_DEBUG(log, - "Read from file by path: {}. Existing HDFS objects: {}", - backQuote(metadata_path + path), metadata.remote_fs_objects.size()); + LOG_DEBUG(log, "Read from file by path: {}. Existing HDFS objects: {}", + backQuote((fs::path(metadata_path) / path).string()), metadata.remote_fs_objects.size()); auto reader = std::make_unique(config, remote_fs_root_path, metadata, buf_size); return std::make_unique(std::move(reader), settings->min_bytes_for_seek); @@ -112,10 +111,10 @@ std::unique_ptr DiskHDFS::writeFile(const String & path /// Path to store new HDFS object. auto file_name = getRandomName(); - auto hdfs_path = remote_fs_root_path + file_name; + String hdfs_path = fs::path(remote_fs_root_path) / file_name; LOG_DEBUG(log, "{} to file by path: {}. HDFS path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", - backQuote(metadata_path + path), remote_fs_root_path + hdfs_path); + backQuote((fs::path(metadata_path) / path).string()), remote_fs_root_path + hdfs_path); /// Single O_WRONLY in libhdfs adds O_TRUNC auto hdfs_buffer = std::make_unique(hdfs_path, @@ -178,7 +177,7 @@ void registerDiskHDFS(DiskFactory & factory) String uri{config.getString(config_prefix + ".endpoint")}; if (uri.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must end with '/', but '{}' doesn't.", uri); String metadata_path = context_->getPath() + "disks/" + name + "/"; diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 49fdf44728b..95a321d5c45 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -32,6 +32,9 @@ struct DiskHDFSSettings */ class DiskHDFS final : public IDiskRemote { + +friend class DiskHDFSReservation; + public: using SettingsPtr = std::unique_ptr; diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index b30e9613ed8..3e2ea2457a6 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -72,10 +72,9 @@ IDiskRemote::Metadata::Metadata( readEscapedString(remote_fs_object_path, buf); if (version == VERSION_ABSOLUTE_PATHS) { - if (!boost::algorithm::starts_with(remote_fs_object_path, remote_fs_root_path)) - throw Exception( - ErrorCodes::UNKNOWN_FORMAT, - "Path in metadata does not correspond S3 root path. Path: {}, root path: {}, disk path: {}", + if (!remote_fs_object_path.starts_with(remote_fs_root_path)) + throw Exception(ErrorCodes::UNKNOWN_FORMAT, + "Path in metadata does not correspond to root path. Path: {}, root path: {}, disk path: {}", remote_fs_object_path, remote_fs_root_path, disk_path_); remote_fs_object_path = remote_fs_object_path.substr(remote_fs_root_path.size()); diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index e725e0ed744..0d088bc2c16 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -17,6 +17,11 @@ namespace fs = std::filesystem; namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + /// Helper class to collect paths into chunks of maximum size. /// For s3 it is Aws::vector, for hdfs it is std::vector. class RemoteFSPathKeeper @@ -35,13 +40,15 @@ protected: using RemoteFSPathKeeperPtr = std::shared_ptr; -/// Base Disk class for remote FS's, which are not posix-compatible (DiskS3 and DiskHDFS) +/// Base Disk class for remote FS's, which are not posix-compatible. +/// Used for s3, hdfs, static. class IDiskRemote : public IDisk { - friend class DiskRemoteReservation; public: + struct Metadata; + IDiskRemote( const String & name_, const String & remote_fs_root_path_, @@ -49,8 +56,6 @@ public: const String & log_name_, size_t thread_pool_size); - struct Metadata; - const String & getName() const final override { return name; } const String & getPath() const final override { return metadata_path; } @@ -61,21 +66,33 @@ public: Metadata readOrCreateMetaForWriting(const String & path, WriteMode mode); - UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } + UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } - UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } + UInt64 getAvailableSpace() const final override { return std::numeric_limits::max(); } - UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } + UInt64 getUnreservedSpace() const final override { return std::numeric_limits::max(); } - UInt64 getKeepingFreeSpace() const override { return 0; } + /// Read-only part - bool exists(const String & path) const override; + bool exists(const String & path) const final override; - bool isFile(const String & path) const override; + bool isFile(const String & path) const final override; - void createFile(const String & path) override; + size_t getFileSize(const String & path) const final override; - size_t getFileSize(const String & path) const override; + void listFiles(const String & path, std::vector & file_names) override; + + void setReadOnly(const String & path) override; + + bool isDirectory(const String & path) const override; + + DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; + + Poco::Timestamp getLastModified(const String & path) override; + + ReservationPtr reserve(UInt64 bytes) override; + + /// Write and modification part void moveFile(const String & from_path, const String & to_path) override; @@ -91,41 +108,46 @@ public: void removeSharedRecursive(const String & path, bool keep_in_remote_fs) override; - void listFiles(const String & path, std::vector & file_names) override; - - void setReadOnly(const String & path) override; - - bool isDirectory(const String & path) const override; - - void createDirectory(const String & path) override; - - void createDirectories(const String & path) override; - void clearDirectory(const String & path) override; void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); } void removeDirectory(const String & path) override; - DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; - void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - Poco::Timestamp getLastModified(const String & path) override; + /// Overriden by disks s3 and hdfs. + virtual void removeFromRemoteFS(RemoteFSPathKeeperPtr /* fs_paths_keeper */) + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} does not support removing remote files", getName()); + } + + /// Overriden by disks s3 and hdfs. + virtual RemoteFSPathKeeperPtr createFSPathKeeper() const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} does not support storage keeper", getName()); + } + + /// Create part + + void createFile(const String & path) final override; + + void createDirectory(const String & path) override; + + void createDirectories(const String & path) override; void createHardLink(const String & src_path, const String & dst_path) override; - ReservationPtr reserve(UInt64 bytes) override; - - virtual void removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) = 0; - - virtual RemoteFSPathKeeperPtr createFSPathKeeper() const = 0; - protected: Poco::Logger * log; + + /// Disk name const String name; + + /// URL + root path to store files in remote FS. const String remote_fs_root_path; + /// Path to store remote FS metadata, i.e. file name in remote FS, its size, etc. const String metadata_path; private: @@ -142,8 +164,9 @@ private: using RemoteDiskPtr = std::shared_ptr; -/// Remote FS (S3, HDFS) metadata file layout: -/// Number of FS objects, Total size of all FS objects. + +/// Remote FS (S3, HDFS, WEB-server) metadata file layout: +/// Number of FS objects, total size of all FS objects. /// Each FS object represents path where object located in FS and size of object. struct IDiskRemote::Metadata @@ -155,7 +178,7 @@ struct IDiskRemote::Metadata using PathAndSize = std::pair; - /// Remote FS (S3, HDFS) root path. + /// Remote FS (S3, HDFS, WEB-server) root path (uri + files directory path). const String & remote_fs_root_path; /// Disk path. @@ -164,10 +187,10 @@ struct IDiskRemote::Metadata /// Relative path to metadata file on local FS. String metadata_file_path; - /// Total size of all remote FS (S3, HDFS) objects. + /// Total size of all remote FS objects. size_t total_size = 0; - /// Remote FS (S3, HDFS) objects paths and their sizes. + /// Remote FS objects paths and their sizes. std::vector remote_fs_objects; /// Number of references (hardlinks) to this metadata file. diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp index 6d4764e4392..29251aeace7 100644 --- a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp @@ -1,7 +1,7 @@ #include "ReadIndirectBufferFromRemoteFS.h" -#if USE_AWS_S3 || USE_HDFS #include +#include #include @@ -124,6 +124,7 @@ template class ReadIndirectBufferFromRemoteFS; #endif -} +template +class ReadIndirectBufferFromRemoteFS; -#endif +} diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.h b/src/Disks/ReadIndirectBufferFromRemoteFS.h index f106e98153f..d3e61dc1289 100644 --- a/src/Disks/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/ReadIndirectBufferFromRemoteFS.h @@ -4,8 +4,6 @@ #include #endif -#if USE_AWS_S3 || USE_HDFS - #include #include #include @@ -45,5 +43,3 @@ private: }; } - -#endif diff --git a/src/Disks/WriteIndirectBufferFromRemoteFS.cpp b/src/Disks/WriteIndirectBufferFromRemoteFS.cpp index 6951b9fa92e..9a805ad6a5b 100644 --- a/src/Disks/WriteIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/WriteIndirectBufferFromRemoteFS.cpp @@ -1,8 +1,8 @@ #include "WriteIndirectBufferFromRemoteFS.h" -#if USE_AWS_S3 || USE_HDFS #include #include +#include namespace DB @@ -65,6 +65,7 @@ template class WriteIndirectBufferFromRemoteFS; #endif -} +template +class WriteIndirectBufferFromRemoteFS; -#endif +} diff --git a/src/Disks/WriteIndirectBufferFromRemoteFS.h b/src/Disks/WriteIndirectBufferFromRemoteFS.h index 4285736858a..69f5705356e 100644 --- a/src/Disks/WriteIndirectBufferFromRemoteFS.h +++ b/src/Disks/WriteIndirectBufferFromRemoteFS.h @@ -4,8 +4,6 @@ #include #endif -#if USE_AWS_S3 || USE_HDFS - #include #include #include @@ -38,5 +36,3 @@ private: }; } - -#endif diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index 8f4901e49e5..e240fe0f83e 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -20,6 +20,8 @@ void registerDiskS3(DiskFactory & factory); void registerDiskHDFS(DiskFactory & factory); #endif +void registerDiskStatic(DiskFactory & factory); + void registerDisks() { @@ -35,6 +37,8 @@ void registerDisks() #if USE_HDFS registerDiskHDFS(factory); #endif + + registerDiskStatic(factory); } } diff --git a/src/IO/ReadBufferFromStatic.cpp b/src/IO/ReadBufferFromStatic.cpp new file mode 100644 index 00000000000..cb5baac6ee2 --- /dev/null +++ b/src/IO/ReadBufferFromStatic.cpp @@ -0,0 +1,110 @@ +#include "ReadBufferFromStatic.h" + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int SEEK_POSITION_OUT_OF_BOUND; + extern const int NETWORK_ERROR; +} + + +ReadBufferFromStatic::ReadBufferFromStatic(const String & url_, + ContextPtr context_, + size_t max_read_tries_, + size_t buffer_size_) + : SeekableReadBuffer(nullptr, 0) + , log(&Poco::Logger::get("ReadBufferFromStaticFilesWebServer")) + , context(context_) + , url(url_) + , buffer_size(buffer_size_) + , max_read_tries(max_read_tries_) +{ +} + + +std::unique_ptr ReadBufferFromStatic::initialize() +{ + Poco::URI uri(url); + return std::make_unique( + uri, + Poco::Net::HTTPRequest::HTTP_GET, + ReadWriteBufferFromHTTP::OutStreamCallback(), + ConnectionTimeouts::getHTTPTimeouts(context), + 0, + Poco::Net::HTTPBasicCredentials{}, + buffer_size); +} + + +bool ReadBufferFromStatic::nextImpl() +{ + if (!impl) + impl = initialize(); + + pos = impl->position(); + + bool ret = false, successful_read = false; + auto sleep_milliseconds = std::chrono::milliseconds(100); + + for (size_t try_num = 0; try_num < max_read_tries; ++try_num) + { + try + { + ret = impl->next(); + successful_read = true; + break; + } + catch (const Exception & e) + { + LOG_WARNING(log, "Read attempt {}/{} failed from {}. ({})", try_num, max_read_tries, url, e.message()); + } + + std::this_thread::sleep_for(sleep_milliseconds); + sleep_milliseconds *= 2; + } + + if (!successful_read) + throw Exception(ErrorCodes::NETWORK_ERROR, "All read attempts ({}) failed for url {}", max_read_tries, url); + + if (ret) + { + internal_buffer = impl->buffer(); + working_buffer = internal_buffer; + /// Do not update pos here, because it is anyway overwritten after nextImpl() in ReadBuffer::next(). + } + + return ret; +} + + +off_t ReadBufferFromStatic::seek(off_t offset_, int whence) +{ + if (impl) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Seek is allowed only before first read attempt from the buffer"); + + if (whence != SEEK_SET) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed"); + + if (offset_ < 0) + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", std::to_string(offset_)); + + offset = offset_; + + return offset; +} + + +off_t ReadBufferFromStatic::getPosition() +{ + return offset + count(); +} + +} diff --git a/src/IO/ReadBufferFromStatic.h b/src/IO/ReadBufferFromStatic.h new file mode 100644 index 00000000000..8de90074cb9 --- /dev/null +++ b/src/IO/ReadBufferFromStatic.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class ReadBufferFromStatic : public SeekableReadBuffer +{ +public: + explicit ReadBufferFromStatic(const String & url_, + ContextPtr context, + UInt64 max_read_tries_, + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); + + bool nextImpl() override; + + off_t seek(off_t off, int whence) override; + + off_t getPosition() override; + +private: + std::unique_ptr initialize(); + + Poco::Logger * log; + ContextPtr context; + + const String url; + size_t buffer_size, max_read_tries; + + std::unique_ptr impl; + + off_t offset = 0; +}; + +} From 603c06d02ef2f61d6c5aa4a4df5adffccae2ab08 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 13 Jun 2021 12:56:22 +0000 Subject: [PATCH 02/86] Add test --- docker/test/integration/runner/Dockerfile | 2 + .../runner/compose/docker_compose_nginx.yml | 11 +++ tests/integration/helpers/cluster.py | 68 +++++++++++++++---- .../test_disk_over_web_server/__init__.py | 0 .../configs/storage_conf.xml | 27 ++++++++ .../test_disk_over_web_server/test.py | 50 ++++++++++++++ 6 files changed, 144 insertions(+), 14 deletions(-) create mode 100644 docker/test/integration/runner/compose/docker_compose_nginx.yml create mode 100644 tests/integration/test_disk_over_web_server/__init__.py create mode 100644 tests/integration/test_disk_over_web_server/configs/storage_conf.xml create mode 100644 tests/integration/test_disk_over_web_server/test.py diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 0665ab7560f..e0026e75f89 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -94,6 +94,8 @@ RUN set -x \ && echo 'dockremap:165536:65536' >> /etc/subuid \ && echo 'dockremap:165536:65536' >> /etc/subgid +RUN echo '127.0.0.1 localhost test.com' >> /etc/hosts + EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] CMD ["sh", "-c", "pytest $PYTEST_OPTS"] diff --git a/docker/test/integration/runner/compose/docker_compose_nginx.yml b/docker/test/integration/runner/compose/docker_compose_nginx.yml new file mode 100644 index 00000000000..d0fb9fc1ff4 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_nginx.yml @@ -0,0 +1,11 @@ +version: '2.3' +services: + # nginx server to host static files. + # Accepts only PUT data by test.com/path and GET already existing data on test.com/path. + # Files will be put into /usr/share/nginx/files. + + nginx: + image: kssenii/nginx-test:1.1 + restart: always + ports: + - 80:80 diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 27d8c05d8ed..ca66acac8fd 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -35,7 +35,7 @@ from helpers.test_tools import assert_eq_with_retry import docker from .client import Client -from .hdfs_api import HDFSApi +#from .hdfs_api import HDFSApi HELPERS_DIR = p.dirname(__file__) CLICKHOUSE_ROOT_DIR = p.join(p.dirname(__file__), "../../..") @@ -248,10 +248,11 @@ class ClickHouseCluster: self.with_redis = False self.with_cassandra = False self.with_jdbc_bridge = False + self.with_nginx = False self.with_minio = False self.minio_dir = os.path.join(self.instances_dir, "minio") - self.minio_certs_dir = None # source for certificates + self.minio_certs_dir = None # source for certificates self.minio_host = "minio1" self.minio_ip = None self.minio_bucket = "root" @@ -309,6 +310,11 @@ class ClickHouseCluster: self.rabbitmq_dir = p.abspath(p.join(self.instances_dir, "rabbitmq")) self.rabbitmq_logs_dir = os.path.join(self.rabbitmq_dir, "logs") + # available when with_rabbitmq == True + self.nginx_host = "nginx" + self.nginx_ip = None + self.nginx_port = 80 + self.nginx_id = self.get_instance_docker_id(self.nginx_host) # available when with_redis == True self.redis_host = "redis1" @@ -333,7 +339,7 @@ class ClickHouseCluster: # available when with_mysql_client == True self.mysql_client_host = "mysql_client" self.mysql_client_container = None - + # available when with_mysql == True self.mysql_host = "mysql57" self.mysql_port = 3306 @@ -532,7 +538,7 @@ class ClickHouseCluster: return self.base_mysql8_cmd - def setup_mysql_cluster_cmd(self, instance, env_variables, docker_compose_yml_dir): + def setup_mysql_cluster_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_mysql_cluster = True env_variables['MYSQL_CLUSTER_PORT'] = str(self.mysql_port) env_variables['MYSQL_CLUSTER_ROOT_HOST'] = '%' @@ -556,7 +562,7 @@ class ClickHouseCluster: '--file', p.join(docker_compose_yml_dir, 'docker_compose_postgres.yml')] return self.base_postgres_cmd - def setup_postgres_cluster_cmd(self, instance, env_variables, docker_compose_yml_dir): + def setup_postgres_cluster_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_postgres_cluster = True env_variables['POSTGRES_PORT'] = str(self.postgres_port) env_variables['POSTGRES2_DIR'] = self.postgres2_logs_dir @@ -648,7 +654,7 @@ class ClickHouseCluster: return self.base_mongo_cmd def setup_minio_cmd(self, instance, env_variables, docker_compose_yml_dir): - self.with_minio = True + self.with_minio = True cert_d = p.join(self.minio_dir, "certs") env_variables['MINIO_CERTS_DIR'] = cert_d env_variables['MINIO_PORT'] = str(self.minio_port) @@ -674,10 +680,18 @@ class ClickHouseCluster: '--file', p.join(docker_compose_yml_dir, 'docker_compose_jdbc_bridge.yml')] return self.base_jdbc_bridge_cmd + def setup_nginx_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_nginx = True + + self.base_cmd.extend(['--file', p.join(docker_compose_yml_dir, 'docker_compose_nginx.yml')]) + self.base_nginx_cmd = ['docker-compose', '--env-file', instance.env_file, '--project-name', self.project_name, + '--file', p.join(docker_compose_yml_dir, 'docker_compose_nginx.yml')] + return self.base_nginx_cmd + def add_instance(self, name, base_config_dir=None, main_configs=None, user_configs=None, dictionaries=None, macros=None, with_zookeeper=False, with_zookeeper_secure=False, - with_mysql_client=False, with_mysql=False, with_mysql8=False, with_mysql_cluster=False, - with_kafka=False, with_kerberized_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None, + with_mysql_client=False, with_mysql=False, with_mysql8=False, with_mysql_cluster=False, + with_kafka=False, with_kerberized_kafka=False, with_rabbitmq=False, with_nginx=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_postgres_cluster=False, with_hdfs=False, with_kerberized_hdfs=False, with_mongo=False, with_redis=False, with_minio=False, with_cassandra=False, with_jdbc_bridge=False, hostname=None, env_variables=None, image="yandex/clickhouse-integration-test", tag=None, @@ -730,6 +744,7 @@ class ClickHouseCluster: with_kafka=with_kafka, with_kerberized_kafka=with_kerberized_kafka, with_rabbitmq=with_rabbitmq, + with_nginx=with_nginx, with_kerberized_hdfs=with_kerberized_hdfs, with_mongo=with_mongo, with_redis=with_redis, @@ -810,6 +825,9 @@ class ClickHouseCluster: if with_rabbitmq and not self.with_rabbitmq: cmds.append(self.setup_rabbitmq_cmd(instance, env_variables, docker_compose_yml_dir)) + if with_nginx and not self.with_nginx: + cmds.append(self.setup_nginx_cmd(instance, env_variables, docker_compose_yml_dir)) + if with_hdfs and not self.with_hdfs: cmds.append(self.setup_hdfs_cmd(instance, env_variables, docker_compose_yml_dir)) @@ -833,7 +851,7 @@ class ClickHouseCluster: if self.minio_certs_dir is None: self.minio_certs_dir = minio_certs_dir else: - raise Exception("Overwriting minio certs dir") + raise Exception("Overwriting minio certs dir") if with_cassandra and not self.with_cassandra: cmds.append(self.setup_cassandra_cmd(instance, env_variables, docker_compose_yml_dir)) @@ -1095,6 +1113,20 @@ class ClickHouseCluster: raise Exception("Cannot wait RabbitMQ container") + def wait_nginx_to_start(self, timeout=60): + self.nginx_ip = self.get_instance_ip(self.nginx_host) + start = time.time() + while time.time() - start < timeout: + try: + self.exec_in_container(self.nginx_id, ["curl", "-X", "PUT", "-d", "Test", "http://test.com/test.txt"]) + res = self.exec_in_container(self.nginx_id, ["curl", "-X", "GET", "http://test.com/test.txt"]) + assert(res == 'Test') + print('nginx static files server is available') + return + except Exception as ex: + print("Can't connect to nginx: " + str(ex)) + time.sleep(0.5) + def wait_zookeeper_secure_to_start(self, timeout=20): logging.debug("Wait ZooKeeper Secure to start") start = time.time() @@ -1135,6 +1167,7 @@ class ClickHouseCluster: krb_conf = p.abspath(p.join(self.instances['node1'].path, "secrets/krb_long.conf")) self.hdfs_kerberized_ip = self.get_instance_ip(self.hdfs_kerberized_host) kdc_ip = self.get_instance_ip('hdfskerberos') + self.hdfs_api = HDFSApi(user="root", timeout=timeout, kerberized=True, @@ -1146,7 +1179,7 @@ class ClickHouseCluster: proxy_port=self.hdfs_kerberized_name_port, data_port=self.hdfs_kerberized_data_port, hdfs_ip=self.hdfs_kerberized_ip, - kdc_ip=kdc_ip) + kdc_ip=kdc_ip) else: self.hdfs_ip = self.get_instance_ip(self.hdfs_host) self.hdfs_api = HDFSApi(user="root", host=self.hdfs_host, data_port=self.hdfs_data_port, proxy_port=self.hdfs_name_port, hdfs_ip=self.hdfs_ip) @@ -1246,7 +1279,7 @@ class ClickHouseCluster: raise Exception("Can't wait Schema Registry to start") - + def wait_cassandra_to_start(self, timeout=180): self.cassandra_ip = self.get_instance_ip(self.cassandra_host) cass_client = cassandra.cluster.Cluster([self.cassandra_ip], port=self.cassandra_port, load_balancing_policy=RoundRobinPolicy()) @@ -1321,7 +1354,7 @@ class ClickHouseCluster: for dir in self.zookeeper_dirs_to_create: os.makedirs(dir) - + if self.use_keeper: # TODO: remove hardcoded paths from here for i in range(1,4): shutil.copy(os.path.join(HELPERS_DIR, f'keeper_config{i}.xml'), os.path.join(self.keeper_instance_dir_prefix + f"{i}", "config" )) @@ -1421,6 +1454,12 @@ class ClickHouseCluster: self.make_hdfs_api(kerberized=True) self.wait_hdfs_to_start() + if self.with_nginx and self.base_nginx_cmd: + logging.debug('Setup nginx') + subprocess_check_call(self.base_nginx_cmd + common_opts + ['--renew-anon-volumes']) + self.nginx_docker_id = self.get_instance_docker_id('nginx') + self.wait_nginx_to_start() + if self.with_mongo and self.base_mongo_cmd: logging.debug('Setup Mongo') run_and_check(self.base_mongo_cmd + common_opts) @@ -1651,7 +1690,7 @@ class ClickHouseInstance: self, cluster, base_path, name, base_config_dir, custom_main_configs, custom_user_configs, custom_dictionaries, macros, with_zookeeper, zookeeper_config_path, with_mysql_client, with_mysql, with_mysql8, with_mysql_cluster, with_kafka, with_kerberized_kafka, - with_rabbitmq, with_kerberized_hdfs, with_mongo, with_redis, with_minio, with_jdbc_bridge, + with_rabbitmq, with_nginx, with_kerberized_hdfs, with_mongo, with_redis, with_minio, with_jdbc_bridge, with_cassandra, server_bin_path, odbc_bridge_bin_path, library_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, with_postgres, with_postgres_cluster, clickhouse_start_command=CLICKHOUSE_START_COMMAND, main_config_name="config.xml", users_config_name="users.xml", copy_common_configs=True, @@ -1689,6 +1728,7 @@ class ClickHouseInstance: self.with_kafka = with_kafka self.with_kerberized_kafka = with_kerberized_kafka self.with_rabbitmq = with_rabbitmq + self.with_nginx = with_nginx self.with_kerberized_hdfs = with_kerberized_hdfs self.with_mongo = with_mongo self.with_redis = with_redis @@ -2173,7 +2213,7 @@ class ClickHouseInstance: depends_on.append("postgres2") depends_on.append("postgres3") depends_on.append("postgres4") - + if self.with_kafka: depends_on.append("kafka1") depends_on.append("schema-registry") diff --git a/tests/integration/test_disk_over_web_server/__init__.py b/tests/integration/test_disk_over_web_server/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_disk_over_web_server/configs/storage_conf.xml b/tests/integration/test_disk_over_web_server/configs/storage_conf.xml new file mode 100644 index 00000000000..b1b35627cef --- /dev/null +++ b/tests/integration/test_disk_over_web_server/configs/storage_conf.xml @@ -0,0 +1,27 @@ + + + + + + static + http://nginx:80/ + + + local + / + + + + + +
+ static +
+ + hdd + +
+
+
+
+
diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py new file mode 100644 index 00000000000..a743fc523c6 --- /dev/null +++ b/tests/integration/test_disk_over_web_server/test.py @@ -0,0 +1,50 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance("node", main_configs=["configs/storage_conf.xml"], with_nginx=True) + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_insert_select(cluster): + node = cluster.instances["node"] + node.query(""" + CREATE TABLE test1 (id Int32) + ENGINE = MergeTree() ORDER BY id + SETTINGS storage_policy = 'static'; + """) + + node.query("INSERT INTO test1 SELECT number FROM numbers(100)") + result = node.query("SELECT count() FROM test1") + assert(int(result) == 100) + + node.query("DETACH TABLE test1") + node.query("ATTACH TABLE test1") + result = node.query("SELECT count() FROM test1") + assert(int(result) == 100) + + node = cluster.instances["node"] + node.query(""" + CREATE TABLE test2 (id Int32) + ENGINE = MergeTree() ORDER BY id + SETTINGS storage_policy = 'static'; + """) + + node.query("INSERT INTO test2 SELECT number FROM numbers(500000)") + result = node.query("SELECT id FROM test2 ORDER BY id") + expected = node.query("SELECT number FROM numbers(500000)") + assert(result == expected) + + node.query("DETACH TABLE test2") + node.query("ATTACH TABLE test2") + result = node.query("SELECT count() FROM test2") + assert(int(result) == 500000) From 2cfcc9980c64b7fca1c70e1c87976162bef78342 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 13 Jun 2021 14:02:08 +0000 Subject: [PATCH 03/86] Better --- src/Core/Settings.h | 1 + src/Disks/DiskStatic.cpp | 136 ------------------ src/Disks/DiskType.h | 6 +- src/Disks/DiskWEBServer.cpp | 119 +++++++++++++++ src/Disks/{DiskStatic.h => DiskWEBServer.h} | 28 ++-- src/Disks/HDFS/DiskHDFS.cpp | 13 +- src/Disks/HDFS/DiskHDFS.h | 3 - src/Disks/IDiskRemote.h | 21 +-- src/Disks/ReadIndirectBufferFromRemoteFS.cpp | 4 +- src/Disks/registerDisks.cpp | 4 +- ...pp => ReadIndirectBufferFromWEBServer.cpp} | 23 ++- ...ic.h => ReadIndirectBufferFromWEBServer.h} | 7 +- tests/integration/helpers/cluster.py | 4 +- .../configs/storage_conf.xml | 12 +- .../test_disk_over_web_server/test.py | 11 +- 15 files changed, 192 insertions(+), 200 deletions(-) delete mode 100644 src/Disks/DiskStatic.cpp create mode 100644 src/Disks/DiskWEBServer.cpp rename src/Disks/{DiskStatic.h => DiskWEBServer.h} (55%) rename src/IO/{ReadBufferFromStatic.cpp => ReadIndirectBufferFromWEBServer.cpp} (77%) rename src/IO/{ReadBufferFromStatic.h => ReadIndirectBufferFromWEBServer.h} (68%) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 2aed174c088..cffe01758f1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -73,6 +73,7 @@ class IColumn; M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \ M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \ M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \ + M(UInt64, http_max_single_read_retries, 4, "The maximum number of retries during single http read.", 0) \ M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \ M(Bool, use_uncompressed_cache, false, "Whether to use the cache of uncompressed blocks.", 0) \ M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \ diff --git a/src/Disks/DiskStatic.cpp b/src/Disks/DiskStatic.cpp deleted file mode 100644 index b3f3752ab43..00000000000 --- a/src/Disks/DiskStatic.cpp +++ /dev/null @@ -1,136 +0,0 @@ -#include "DiskStatic.h" - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace fs = std::filesystem; - - -namespace DB -{ - -class ReadIndirectBufferFromStatic final : public ReadIndirectBufferFromRemoteFS -{ -public: - ReadIndirectBufferFromStatic( - const String & url_, - DiskStatic::Metadata metadata_, - ContextPtr context_, - size_t max_read_tries_, - size_t buf_size_) - : ReadIndirectBufferFromRemoteFS(metadata_) - , url(url_) - , context(context_) - , max_read_tries(max_read_tries_) - , buf_size(buf_size_) - { - } - - std::unique_ptr createReadBuffer(const String & path) override - { - return std::make_unique(url + path, context, max_read_tries, buf_size); - } - -private: - String url; - ContextPtr context; - size_t max_read_tries; - size_t buf_size; -}; - - -DiskStatic::DiskStatic(const String & disk_name_, - const String & files_root_path_url_, - const String & metadata_path_, - ContextPtr context_, - SettingsPtr settings_) - : IDiskRemote(disk_name_, files_root_path_url_, metadata_path_, "DiskStatic", settings_->thread_pool_size) - , WithContext(context_->getGlobalContext()) - , settings(std::move(settings_)) -{ -} - - -void DiskStatic::startup() -{ -} - - -std::unique_ptr DiskStatic::readFile(const String & path, size_t buf_size, size_t, size_t, size_t, MMappedFileCache *) const -{ - auto metadata = readMeta(path); - - LOG_DEBUG(log, "Read from file by path: {}. Existing objects: {}", - backQuote(metadata_path + path), metadata.remote_fs_objects.size()); - - auto reader = std::make_unique(remote_fs_root_path, metadata, getContext(), 1, buf_size); - return std::make_unique(std::move(reader), settings->min_bytes_for_seek); -} - - -std::unique_ptr DiskStatic::writeFile(const String & path, size_t buf_size, WriteMode mode) -{ - auto metadata = readOrCreateMetaForWriting(path, mode); - /// Update read_only flag in IDiskRemote::metadata. - /// setReadOnly(); - - auto file_name = generateName(); - auto file_path = remote_fs_root_path + file_name; - - LOG_DEBUG(log, "Write to file url: {}", file_path); - - auto timeouts = ConnectionTimeouts::getHTTPTimeouts(getContext()); - - Poco::URI uri(file_path); - auto writer = std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_PUT, timeouts, buf_size); - - return std::make_unique>(std::move(writer), - std::move(metadata), - file_name); -} - - -namespace -{ -std::unique_ptr getSettings(const Poco::Util::AbstractConfiguration & config, const String & config_prefix) -{ - return std::make_unique( - config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".thread_pool_size", 16)); -} -} - -void registerDiskStatic(DiskFactory & factory) -{ - auto creator = [](const String & disk_name, - const Poco::Util::AbstractConfiguration & config, - const String & config_prefix, - ContextConstPtr context) -> DiskPtr - { - fs::path disk = fs::path(context->getPath()) / "disks" / disk_name; - fs::create_directories(disk); - - String url{config.getString(config_prefix + ".endpoint")}; - if (!url.ends_with('/')) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "URL must end with '/', but '{}' doesn't.", url); - - String metadata_path = fs::path(context->getPath()) / "disks" / disk_name / ""; - - return std::make_shared(disk_name, url, metadata_path, context, getSettings(config, config_prefix)); - }; - - factory.registerDiskType("static", creator); -} - - -} diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 8a10a790be0..20aaf285f25 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -13,7 +13,7 @@ struct DiskType RAM, S3, HDFS, - Static + WEBServer }; static String toString(Type disk_type) @@ -28,8 +28,8 @@ struct DiskType return "s3"; case Type::HDFS: return "hdfs"; - case Type::Static: - return "static"; + case Type::WEBServer: + return "web"; } __builtin_unreachable(); } diff --git a/src/Disks/DiskWEBServer.cpp b/src/Disks/DiskWEBServer.cpp new file mode 100644 index 00000000000..7442c1177e0 --- /dev/null +++ b/src/Disks/DiskWEBServer.cpp @@ -0,0 +1,119 @@ +#include "DiskWEBServer.h" + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include + + +namespace DB +{ + +class ReadBufferFromWEBServer final : public ReadIndirectBufferFromRemoteFS +{ +public: + ReadBufferFromWEBServer( + const String & url_, + DiskWEBServer::Metadata metadata_, + ContextPtr context_, + size_t max_read_tries_, + size_t buf_size_) + : ReadIndirectBufferFromRemoteFS(metadata_) + , url(url_) + , context(context_) + , max_read_tries(max_read_tries_) + , buf_size(buf_size_) + { + } + + std::unique_ptr createReadBuffer(const String & path) override + { + return std::make_unique(fs::path(url) / path, context, max_read_tries, buf_size); + } + +private: + String url; + ContextPtr context; + size_t max_read_tries; + size_t buf_size; +}; + + +DiskWEBServer::DiskWEBServer( + const String & disk_name_, + const String & files_root_path_url_, + const String & metadata_path_, + ContextPtr context_, + SettingsPtr settings_) + : IDiskRemote(disk_name_, files_root_path_url_, metadata_path_, "DiskWEBServer", settings_->thread_pool_size) + , WithContext(context_->getGlobalContext()) + , settings(std::move(settings_)) +{ +} + + +std::unique_ptr DiskWEBServer::readFile(const String & path, size_t buf_size, size_t, size_t, size_t, MMappedFileCache *) const +{ + auto metadata = readMeta(path); + + LOG_DEBUG(log, "Read from file by path: {}. Existing objects: {}", backQuote(metadata_path + path), metadata.remote_fs_objects.size()); + + auto reader = std::make_unique(remote_fs_root_path, metadata, getContext(), 1, buf_size); + return std::make_unique(std::move(reader), settings->min_bytes_for_seek); +} + + +std::unique_ptr DiskWEBServer::writeFile(const String & path, size_t buf_size, WriteMode mode) +{ + auto metadata = readOrCreateMetaForWriting(path, mode); + + auto file_name = generateName(); + String file_path = fs::path(remote_fs_root_path) / file_name; + + LOG_DEBUG(log, "Write to file url: {}", file_path); + + auto timeouts = ConnectionTimeouts::getHTTPTimeouts(getContext()); + Poco::URI uri(file_path); + auto writer = std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_PUT, timeouts, buf_size); + + return std::make_unique>(std::move(writer), std::move(metadata), file_name); +} + + +void registerDiskWEBServer(DiskFactory & factory) +{ + auto creator = [](const String & disk_name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextConstPtr context) -> DiskPtr + { + fs::path disk = fs::path(context->getPath()) / "disks" / disk_name; + fs::create_directories(disk); + + String url{config.getString(config_prefix + ".endpoint")}; + if (!url.ends_with('/')) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "URL must end with '/', but '{}' doesn't.", url); + + auto settings = std::make_unique( + context->getGlobalContext()->getSettingsRef().http_max_single_read_retries, + config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), + config.getInt(config_prefix + ".thread_pool_size", 16)); + + String metadata_path = fs::path(context->getPath()) / "disks" / disk_name / ""; + + return std::make_shared(disk_name, url, metadata_path, context, std::move(settings)); + }; + + factory.registerDiskType("web", creator); +} + +} diff --git a/src/Disks/DiskStatic.h b/src/Disks/DiskWEBServer.h similarity index 55% rename from src/Disks/DiskStatic.h rename to src/Disks/DiskWEBServer.h index 9f3c2598236..bcf69ae4d12 100644 --- a/src/Disks/DiskStatic.h +++ b/src/Disks/DiskWEBServer.h @@ -7,34 +7,34 @@ namespace DB { -struct DiskStaticSettings +struct DiskWEBServerSettings { + /// Number of read attempts before throw that network is unreachable. + size_t max_read_tries; + /// Passed to SeekAvoidingReadBuffer. size_t min_bytes_for_seek; - int thread_pool_size; - int objects_chunk_size_to_delete; + /// Used by IDiskRemote. + size_t thread_pool_size; - DiskStaticSettings( - int min_bytes_for_seek_, - int thread_pool_size_) - : min_bytes_for_seek(min_bytes_for_seek_) - , thread_pool_size(thread_pool_size_) {} + DiskWEBServerSettings(size_t max_read_tries_, size_t min_bytes_for_seek_, size_t thread_pool_size_) + : max_read_tries(max_read_tries_) , min_bytes_for_seek(min_bytes_for_seek_) , thread_pool_size(thread_pool_size_) {} }; -class DiskStatic : public IDiskRemote, WithContext +/// Disk to store data on a web server and metadata on the local disk. + +class DiskWEBServer : public IDiskRemote, WithContext { -using SettingsPtr = std::unique_ptr; +using SettingsPtr = std::unique_ptr; public: - DiskStatic(const String & disk_name_, + DiskWEBServer(const String & disk_name_, const String & files_root_path_url_, const String & metadata_path_, ContextPtr context, SettingsPtr settings_); - DiskType::Type getType() const override { return DiskType::Type::Static; } - - virtual void startup() override; + DiskType::Type getType() const override { return DiskType::Type::WEBServer; } std::unique_ptr readFile( const String & path, diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 9d40c613ecf..dafd507ba1e 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -67,7 +67,7 @@ public: std::unique_ptr createReadBuffer(const String & path) override { - return std::make_unique(hdfs_uri, fs::path(hdfs_directory) / path, config, buf_size); + return std::make_unique(hdfs_uri, hdfs_directory + path, config, buf_size); } private: @@ -97,8 +97,9 @@ std::unique_ptr DiskHDFS::readFile(const String & path, { auto metadata = readMeta(path); - LOG_DEBUG(log, "Read from file by path: {}. Existing HDFS objects: {}", - backQuote((fs::path(metadata_path) / path).string()), metadata.remote_fs_objects.size()); + LOG_DEBUG(log, + "Read from file by path: {}. Existing HDFS objects: {}", + backQuote(metadata_path + path), metadata.remote_fs_objects.size()); auto reader = std::make_unique(config, remote_fs_root_path, metadata, buf_size); return std::make_unique(std::move(reader), settings->min_bytes_for_seek); @@ -111,10 +112,10 @@ std::unique_ptr DiskHDFS::writeFile(const String & path /// Path to store new HDFS object. auto file_name = getRandomName(); - String hdfs_path = fs::path(remote_fs_root_path) / file_name; + auto hdfs_path = remote_fs_root_path + file_name; LOG_DEBUG(log, "{} to file by path: {}. HDFS path: {}", mode == WriteMode::Rewrite ? "Write" : "Append", - backQuote((fs::path(metadata_path) / path).string()), remote_fs_root_path + hdfs_path); + backQuote(metadata_path + path), remote_fs_root_path + hdfs_path); /// Single O_WRONLY in libhdfs adds O_TRUNC auto hdfs_buffer = std::make_unique(hdfs_path, @@ -177,7 +178,7 @@ void registerDiskHDFS(DiskFactory & factory) String uri{config.getString(config_prefix + ".endpoint")}; if (uri.back() != '/') - throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must end with '/', but '{}' doesn't.", uri); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "HDFS path must ends with '/', but '{}' doesn't.", uri); String metadata_path = context_->getPath() + "disks/" + name + "/"; diff --git a/src/Disks/HDFS/DiskHDFS.h b/src/Disks/HDFS/DiskHDFS.h index 95a321d5c45..49fdf44728b 100644 --- a/src/Disks/HDFS/DiskHDFS.h +++ b/src/Disks/HDFS/DiskHDFS.h @@ -32,9 +32,6 @@ struct DiskHDFSSettings */ class DiskHDFS final : public IDiskRemote { - -friend class DiskHDFSReservation; - public: using SettingsPtr = std::unique_ptr; diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 0d088bc2c16..e82d7c200a1 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -23,7 +23,8 @@ namespace ErrorCodes } /// Helper class to collect paths into chunks of maximum size. -/// For s3 it is Aws::vector, for hdfs it is std::vector. +/// For diskS3 it is Aws::vector, for diskHDFS it is std::vector. +/// For DiskWEBServer not implemented. class RemoteFSPathKeeper { public: @@ -41,14 +42,12 @@ using RemoteFSPathKeeperPtr = std::shared_ptr; /// Base Disk class for remote FS's, which are not posix-compatible. -/// Used for s3, hdfs, static. +/// Used for s3, hdfs, web-server. class IDiskRemote : public IDisk { friend class DiskRemoteReservation; public: - struct Metadata; - IDiskRemote( const String & name_, const String & remote_fs_root_path_, @@ -56,9 +55,9 @@ public: const String & log_name_, size_t thread_pool_size); - const String & getName() const final override { return name; } + /// Methods to manage metadata of remote FS objects. - const String & getPath() const final override { return metadata_path; } + struct Metadata; Metadata readMeta(const String & path) const; @@ -66,6 +65,12 @@ public: Metadata readOrCreateMetaForWriting(const String & path, WriteMode mode); + /// Disk info + + const String & getName() const final override { return name; } + + const String & getPath() const final override { return metadata_path; } + UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } UInt64 getAvailableSpace() const final override { return std::numeric_limits::max(); } @@ -74,9 +79,9 @@ public: /// Read-only part - bool exists(const String & path) const final override; + bool exists(const String & path) const override; - bool isFile(const String & path) const final override; + bool isFile(const String & path) const override; size_t getFileSize(const String & path) const final override; diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp index 29251aeace7..4620908b46b 100644 --- a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp @@ -1,8 +1,8 @@ #include "ReadIndirectBufferFromRemoteFS.h" #include -#include #include +#include namespace DB @@ -125,6 +125,6 @@ class ReadIndirectBufferFromRemoteFS; #endif template -class ReadIndirectBufferFromRemoteFS; +class ReadIndirectBufferFromRemoteFS; } diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index e240fe0f83e..213ba1f9593 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -20,7 +20,7 @@ void registerDiskS3(DiskFactory & factory); void registerDiskHDFS(DiskFactory & factory); #endif -void registerDiskStatic(DiskFactory & factory); +void registerDiskWEBServer(DiskFactory & factory); void registerDisks() @@ -38,7 +38,7 @@ void registerDisks() registerDiskHDFS(factory); #endif - registerDiskStatic(factory); + registerDiskWEBServer(factory); } } diff --git a/src/IO/ReadBufferFromStatic.cpp b/src/IO/ReadIndirectBufferFromWEBServer.cpp similarity index 77% rename from src/IO/ReadBufferFromStatic.cpp rename to src/IO/ReadIndirectBufferFromWEBServer.cpp index cb5baac6ee2..11f1a638147 100644 --- a/src/IO/ReadBufferFromStatic.cpp +++ b/src/IO/ReadIndirectBufferFromWEBServer.cpp @@ -1,4 +1,4 @@ -#include "ReadBufferFromStatic.h" +#include "ReadIndirectBufferFromWEBServer.h" #include #include @@ -16,21 +16,21 @@ namespace ErrorCodes } -ReadBufferFromStatic::ReadBufferFromStatic(const String & url_, +ReadIndirectBufferFromWEBServer::ReadIndirectBufferFromWEBServer(const String & url_, ContextPtr context_, size_t max_read_tries_, - size_t buffer_size_) - : SeekableReadBuffer(nullptr, 0) - , log(&Poco::Logger::get("ReadBufferFromStaticFilesWebServer")) + size_t buf_size_) + : BufferWithOwnMemory(buf_size_) + , log(&Poco::Logger::get("ReadIndirectBufferFromWEBServer")) , context(context_) , url(url_) - , buffer_size(buffer_size_) + , buffer_size(buf_size_) , max_read_tries(max_read_tries_) { } -std::unique_ptr ReadBufferFromStatic::initialize() +std::unique_ptr ReadIndirectBufferFromWEBServer::initialize() { Poco::URI uri(url); return std::make_unique( @@ -44,7 +44,7 @@ std::unique_ptr ReadBufferFromStatic::initialize() } -bool ReadBufferFromStatic::nextImpl() +bool ReadIndirectBufferFromWEBServer::nextImpl() { if (!impl) impl = initialize(); @@ -76,8 +76,7 @@ bool ReadBufferFromStatic::nextImpl() if (ret) { - internal_buffer = impl->buffer(); - working_buffer = internal_buffer; + working_buffer = internal_buffer = impl->buffer(); /// Do not update pos here, because it is anyway overwritten after nextImpl() in ReadBuffer::next(). } @@ -85,7 +84,7 @@ bool ReadBufferFromStatic::nextImpl() } -off_t ReadBufferFromStatic::seek(off_t offset_, int whence) +off_t ReadIndirectBufferFromWEBServer::seek(off_t offset_, int whence) { if (impl) throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Seek is allowed only before first read attempt from the buffer"); @@ -102,7 +101,7 @@ off_t ReadBufferFromStatic::seek(off_t offset_, int whence) } -off_t ReadBufferFromStatic::getPosition() +off_t ReadIndirectBufferFromWEBServer::getPosition() { return offset + count(); } diff --git a/src/IO/ReadBufferFromStatic.h b/src/IO/ReadIndirectBufferFromWEBServer.h similarity index 68% rename from src/IO/ReadBufferFromStatic.h rename to src/IO/ReadIndirectBufferFromWEBServer.h index 8de90074cb9..64dd74ed9ef 100644 --- a/src/IO/ReadBufferFromStatic.h +++ b/src/IO/ReadIndirectBufferFromWEBServer.h @@ -7,10 +7,13 @@ namespace DB { -class ReadBufferFromStatic : public SeekableReadBuffer +/// Read buffer, which reads via http, but is used as ReadBufferFromFileBase. +/// Used to read files, hosted on a WEB server with static files. + +class ReadIndirectBufferFromWEBServer : public BufferWithOwnMemory { public: - explicit ReadBufferFromStatic(const String & url_, + explicit ReadIndirectBufferFromWEBServer(const String & url_, ContextPtr context, UInt64 max_read_tries_, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index ca66acac8fd..63c2b6812a4 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -35,7 +35,7 @@ from helpers.test_tools import assert_eq_with_retry import docker from .client import Client -#from .hdfs_api import HDFSApi +from .hdfs_api import HDFSApi HELPERS_DIR = p.dirname(__file__) CLICKHOUSE_ROOT_DIR = p.join(p.dirname(__file__), "../../..") @@ -310,7 +310,7 @@ class ClickHouseCluster: self.rabbitmq_dir = p.abspath(p.join(self.instances_dir, "rabbitmq")) self.rabbitmq_logs_dir = os.path.join(self.rabbitmq_dir, "logs") - # available when with_rabbitmq == True + # available when with_nginx == True self.nginx_host = "nginx" self.nginx_ip = None self.nginx_port = 80 diff --git a/tests/integration/test_disk_over_web_server/configs/storage_conf.xml b/tests/integration/test_disk_over_web_server/configs/storage_conf.xml index b1b35627cef..349725882d6 100644 --- a/tests/integration/test_disk_over_web_server/configs/storage_conf.xml +++ b/tests/integration/test_disk_over_web_server/configs/storage_conf.xml @@ -2,26 +2,26 @@ - - static + + web http://nginx:80/ - + local / - +
- static + web
hdd
-
+
diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index a743fc523c6..77f61e94911 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -20,7 +20,7 @@ def test_insert_select(cluster): node.query(""" CREATE TABLE test1 (id Int32) ENGINE = MergeTree() ORDER BY id - SETTINGS storage_policy = 'static'; + SETTINGS storage_policy = 'web'; """) node.query("INSERT INTO test1 SELECT number FROM numbers(100)") @@ -36,15 +36,18 @@ def test_insert_select(cluster): node.query(""" CREATE TABLE test2 (id Int32) ENGINE = MergeTree() ORDER BY id - SETTINGS storage_policy = 'static'; + SETTINGS storage_policy = 'web'; """) node.query("INSERT INTO test2 SELECT number FROM numbers(500000)") result = node.query("SELECT id FROM test2 ORDER BY id") expected = node.query("SELECT number FROM numbers(500000)") assert(result == expected) - + node.query("INSERT INTO test2 SELECT number FROM numbers(500000, 500000)") node.query("DETACH TABLE test2") node.query("ATTACH TABLE test2") + node.query("INSERT INTO test2 SELECT number FROM numbers(1000000, 500000)") result = node.query("SELECT count() FROM test2") - assert(int(result) == 500000) + assert(int(result) == 1500000) + result = node.query("SELECT id FROM test2 WHERE id % 100 = 0 ORDER BY id") + assert(result == node.query("SELECT number FROM numbers(1500000) WHERE number % 100 = 0 ORDER BY number")) From 11faddb39f183da0f16c15002a270dda75c27536 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 13 Jun 2021 19:30:59 +0000 Subject: [PATCH 04/86] Better --- src/Disks/DiskWEBServer.cpp | 2 -- src/Disks/DiskWEBServer.h | 2 +- src/Disks/IDiskRemote.h | 16 +++++++--------- src/IO/ReadIndirectBufferFromWEBServer.cpp | 3 ++- src/IO/ReadIndirectBufferFromWEBServer.h | 11 +++++++---- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/Disks/DiskWEBServer.cpp b/src/Disks/DiskWEBServer.cpp index 7442c1177e0..e4c321ad1c1 100644 --- a/src/Disks/DiskWEBServer.cpp +++ b/src/Disks/DiskWEBServer.cpp @@ -9,10 +9,8 @@ #include #include -#include #include #include -#include namespace DB diff --git a/src/Disks/DiskWEBServer.h b/src/Disks/DiskWEBServer.h index bcf69ae4d12..39dfd45325f 100644 --- a/src/Disks/DiskWEBServer.h +++ b/src/Disks/DiskWEBServer.h @@ -21,7 +21,7 @@ struct DiskWEBServerSettings }; -/// Disk to store data on a web server and metadata on the local disk. +/// Storage to store data on a web server and metadata on the local disk. class DiskWEBServer : public IDiskRemote, WithContext { diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index e82d7c200a1..49ee68079ab 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -24,7 +24,7 @@ namespace ErrorCodes /// Helper class to collect paths into chunks of maximum size. /// For diskS3 it is Aws::vector, for diskHDFS it is std::vector. -/// For DiskWEBServer not implemented. +/// For diskWEBServer not implemented. class RemoteFSPathKeeper { public: @@ -42,7 +42,7 @@ using RemoteFSPathKeeperPtr = std::shared_ptr; /// Base Disk class for remote FS's, which are not posix-compatible. -/// Used for s3, hdfs, web-server. +/// Used to implement disks over s3, hdfs, web-server. class IDiskRemote : public IDisk { friend class DiskRemoteReservation; @@ -55,7 +55,7 @@ public: const String & log_name_, size_t thread_pool_size); - /// Methods to manage metadata of remote FS objects. + /// Methods to manage local metadata of remote FS objects. struct Metadata; @@ -83,7 +83,7 @@ public: bool isFile(const String & path) const override; - size_t getFileSize(const String & path) const final override; + size_t getFileSize(const String & path) const override; void listFiles(const String & path, std::vector & file_names) override; @@ -130,7 +130,7 @@ public: /// Overriden by disks s3 and hdfs. virtual RemoteFSPathKeeperPtr createFSPathKeeper() const { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} does not support storage keeper", getName()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} does not support FS paths keeper", getName()); } /// Create part @@ -148,11 +148,9 @@ protected: /// Disk name const String name; - /// URL + root path to store files in remote FS. const String remote_fs_root_path; - /// Path to store remote FS metadata, i.e. file name in remote FS, its size, etc. const String metadata_path; private: @@ -171,8 +169,8 @@ using RemoteDiskPtr = std::shared_ptr; /// Remote FS (S3, HDFS, WEB-server) metadata file layout: -/// Number of FS objects, total size of all FS objects. -/// Each FS object represents path where object located in FS and size of object. +/// FS objects, their number and total size of all FS objects. +/// Each FS object represents a file path in remote FS and its size. struct IDiskRemote::Metadata { diff --git a/src/IO/ReadIndirectBufferFromWEBServer.cpp b/src/IO/ReadIndirectBufferFromWEBServer.cpp index 11f1a638147..9beaf6c6be3 100644 --- a/src/IO/ReadIndirectBufferFromWEBServer.cpp +++ b/src/IO/ReadIndirectBufferFromWEBServer.cpp @@ -1,7 +1,8 @@ #include "ReadIndirectBufferFromWEBServer.h" -#include #include +#include +#include #include diff --git a/src/IO/ReadIndirectBufferFromWEBServer.h b/src/IO/ReadIndirectBufferFromWEBServer.h index 64dd74ed9ef..a27f656b447 100644 --- a/src/IO/ReadIndirectBufferFromWEBServer.h +++ b/src/IO/ReadIndirectBufferFromWEBServer.h @@ -1,15 +1,18 @@ #pragma once #include -#include +#include +#include namespace DB { -/// Read buffer, which reads via http, but is used as ReadBufferFromFileBase. -/// Used to read files, hosted on a WEB server with static files. - +/* Read buffer, which reads via http, but is used as ReadBufferFromFileBase. + * Used to read files, hosted on a WEB server with static files. + * + * Usage: ReadIndirectBufferFromRemoteFS -> SeekAvoidingReadBuffer -> ReadIndirectBufferFromWEBServer -> ReadWriteBufferFromHTTP. + */ class ReadIndirectBufferFromWEBServer : public BufferWithOwnMemory { public: From a1f11f51783d9d38d8eae500b902f9ad05268f8e Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 14 Jun 2021 09:32:00 +0000 Subject: [PATCH 05/86] Fixes --- src/Disks/DiskType.h | 4 +- .../{DiskWEBServer.cpp => DiskWebServer.cpp} | 38 +++++++++++-------- .../{DiskWEBServer.h => DiskWebServer.h} | 12 +++--- src/Disks/IDiskRemote.h | 4 +- src/Disks/ReadIndirectBufferFromRemoteFS.cpp | 4 +- src/Disks/registerDisks.cpp | 4 +- ...pp => ReadIndirectBufferFromWebServer.cpp} | 24 ++++++------ ...er.h => ReadIndirectBufferFromWebServer.h} | 16 ++++---- .../test_disk_over_web_server/test.py | 6 ++- 9 files changed, 61 insertions(+), 51 deletions(-) rename src/Disks/{DiskWEBServer.cpp => DiskWebServer.cpp} (77%) rename src/Disks/{DiskWEBServer.h => DiskWebServer.h} (82%) rename src/IO/{ReadIndirectBufferFromWEBServer.cpp => ReadIndirectBufferFromWebServer.cpp} (76%) rename src/IO/{ReadIndirectBufferFromWEBServer.h => ReadIndirectBufferFromWebServer.h} (54%) diff --git a/src/Disks/DiskType.h b/src/Disks/DiskType.h index 20aaf285f25..1b6e3513ed5 100644 --- a/src/Disks/DiskType.h +++ b/src/Disks/DiskType.h @@ -13,7 +13,7 @@ struct DiskType RAM, S3, HDFS, - WEBServer + WebServer }; static String toString(Type disk_type) @@ -28,7 +28,7 @@ struct DiskType return "s3"; case Type::HDFS: return "hdfs"; - case Type::WEBServer: + case Type::WebServer: return "web"; } __builtin_unreachable(); diff --git a/src/Disks/DiskWEBServer.cpp b/src/Disks/DiskWebServer.cpp similarity index 77% rename from src/Disks/DiskWEBServer.cpp rename to src/Disks/DiskWebServer.cpp index e4c321ad1c1..9ce01e800e7 100644 --- a/src/Disks/DiskWEBServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -1,4 +1,4 @@ -#include "DiskWEBServer.h" +#include "DiskWebServer.h" #include #include @@ -9,23 +9,29 @@ #include #include -#include +#include #include namespace DB { -class ReadBufferFromWEBServer final : public ReadIndirectBufferFromRemoteFS +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + +class ReadBufferFromWebServer final : public ReadIndirectBufferFromRemoteFS { public: - ReadBufferFromWEBServer( + ReadBufferFromWebServer( const String & url_, - DiskWEBServer::Metadata metadata_, + DiskWebServer::Metadata metadata_, ContextPtr context_, size_t max_read_tries_, size_t buf_size_) - : ReadIndirectBufferFromRemoteFS(metadata_) + : ReadIndirectBufferFromRemoteFS(metadata_) , url(url_) , context(context_) , max_read_tries(max_read_tries_) @@ -33,9 +39,9 @@ public: { } - std::unique_ptr createReadBuffer(const String & path) override + std::unique_ptr createReadBuffer(const String & path) override { - return std::make_unique(fs::path(url) / path, context, max_read_tries, buf_size); + return std::make_unique(fs::path(url) / path, context, max_read_tries, buf_size); } private: @@ -46,31 +52,31 @@ private: }; -DiskWEBServer::DiskWEBServer( +DiskWebServer::DiskWebServer( const String & disk_name_, const String & files_root_path_url_, const String & metadata_path_, ContextPtr context_, SettingsPtr settings_) - : IDiskRemote(disk_name_, files_root_path_url_, metadata_path_, "DiskWEBServer", settings_->thread_pool_size) + : IDiskRemote(disk_name_, files_root_path_url_, metadata_path_, "DiskWebServer", settings_->thread_pool_size) , WithContext(context_->getGlobalContext()) , settings(std::move(settings_)) { } -std::unique_ptr DiskWEBServer::readFile(const String & path, size_t buf_size, size_t, size_t, size_t, MMappedFileCache *) const +std::unique_ptr DiskWebServer::readFile(const String & path, size_t buf_size, size_t, size_t, size_t, MMappedFileCache *) const { auto metadata = readMeta(path); LOG_DEBUG(log, "Read from file by path: {}. Existing objects: {}", backQuote(metadata_path + path), metadata.remote_fs_objects.size()); - auto reader = std::make_unique(remote_fs_root_path, metadata, getContext(), 1, buf_size); + auto reader = std::make_unique(remote_fs_root_path, metadata, getContext(), 1, buf_size); return std::make_unique(std::move(reader), settings->min_bytes_for_seek); } -std::unique_ptr DiskWEBServer::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskWebServer::writeFile(const String & path, size_t buf_size, WriteMode mode) { auto metadata = readOrCreateMetaForWriting(path, mode); @@ -87,7 +93,7 @@ std::unique_ptr DiskWEBServer::writeFile(const String & } -void registerDiskWEBServer(DiskFactory & factory) +void registerDiskWebServer(DiskFactory & factory) { auto creator = [](const String & disk_name, const Poco::Util::AbstractConfiguration & config, @@ -101,14 +107,14 @@ void registerDiskWEBServer(DiskFactory & factory) if (!url.ends_with('/')) throw Exception(ErrorCodes::BAD_ARGUMENTS, "URL must end with '/', but '{}' doesn't.", url); - auto settings = std::make_unique( + auto settings = std::make_unique( context->getGlobalContext()->getSettingsRef().http_max_single_read_retries, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), config.getInt(config_prefix + ".thread_pool_size", 16)); String metadata_path = fs::path(context->getPath()) / "disks" / disk_name / ""; - return std::make_shared(disk_name, url, metadata_path, context, std::move(settings)); + return std::make_shared(disk_name, url, metadata_path, context, std::move(settings)); }; factory.registerDiskType("web", creator); diff --git a/src/Disks/DiskWEBServer.h b/src/Disks/DiskWebServer.h similarity index 82% rename from src/Disks/DiskWEBServer.h rename to src/Disks/DiskWebServer.h index 39dfd45325f..1a653bc49f7 100644 --- a/src/Disks/DiskWEBServer.h +++ b/src/Disks/DiskWebServer.h @@ -7,7 +7,7 @@ namespace DB { -struct DiskWEBServerSettings +struct DiskWebServerSettings { /// Number of read attempts before throw that network is unreachable. size_t max_read_tries; @@ -16,25 +16,25 @@ struct DiskWEBServerSettings /// Used by IDiskRemote. size_t thread_pool_size; - DiskWEBServerSettings(size_t max_read_tries_, size_t min_bytes_for_seek_, size_t thread_pool_size_) + DiskWebServerSettings(size_t max_read_tries_, size_t min_bytes_for_seek_, size_t thread_pool_size_) : max_read_tries(max_read_tries_) , min_bytes_for_seek(min_bytes_for_seek_) , thread_pool_size(thread_pool_size_) {} }; /// Storage to store data on a web server and metadata on the local disk. -class DiskWEBServer : public IDiskRemote, WithContext +class DiskWebServer : public IDiskRemote, WithContext { -using SettingsPtr = std::unique_ptr; +using SettingsPtr = std::unique_ptr; public: - DiskWEBServer(const String & disk_name_, + DiskWebServer(const String & disk_name_, const String & files_root_path_url_, const String & metadata_path_, ContextPtr context, SettingsPtr settings_); - DiskType::Type getType() const override { return DiskType::Type::WEBServer; } + DiskType::Type getType() const override { return DiskType::Type::WebServer; } std::unique_ptr readFile( const String & path, diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 49ee68079ab..672ee6468e6 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -121,13 +121,13 @@ public: void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - /// Overriden by disks s3 and hdfs. + /// Overridden by disks s3 and hdfs. virtual void removeFromRemoteFS(RemoteFSPathKeeperPtr /* fs_paths_keeper */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} does not support removing remote files", getName()); } - /// Overriden by disks s3 and hdfs. + /// Overridden by disks s3 and hdfs. virtual RemoteFSPathKeeperPtr createFSPathKeeper() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} does not support FS paths keeper", getName()); diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp index 4620908b46b..b2984685249 100644 --- a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB @@ -125,6 +125,6 @@ class ReadIndirectBufferFromRemoteFS; #endif template -class ReadIndirectBufferFromRemoteFS; +class ReadIndirectBufferFromRemoteFS; } diff --git a/src/Disks/registerDisks.cpp b/src/Disks/registerDisks.cpp index 213ba1f9593..4f35ec5f1d9 100644 --- a/src/Disks/registerDisks.cpp +++ b/src/Disks/registerDisks.cpp @@ -20,7 +20,7 @@ void registerDiskS3(DiskFactory & factory); void registerDiskHDFS(DiskFactory & factory); #endif -void registerDiskWEBServer(DiskFactory & factory); +void registerDiskWebServer(DiskFactory & factory); void registerDisks() @@ -38,7 +38,7 @@ void registerDisks() registerDiskHDFS(factory); #endif - registerDiskWEBServer(factory); + registerDiskWebServer(factory); } } diff --git a/src/IO/ReadIndirectBufferFromWEBServer.cpp b/src/IO/ReadIndirectBufferFromWebServer.cpp similarity index 76% rename from src/IO/ReadIndirectBufferFromWEBServer.cpp rename to src/IO/ReadIndirectBufferFromWebServer.cpp index 9beaf6c6be3..f2d01c594de 100644 --- a/src/IO/ReadIndirectBufferFromWEBServer.cpp +++ b/src/IO/ReadIndirectBufferFromWebServer.cpp @@ -1,4 +1,4 @@ -#include "ReadIndirectBufferFromWEBServer.h" +#include "ReadIndirectBufferFromWebServer.h" #include #include @@ -17,21 +17,21 @@ namespace ErrorCodes } -ReadIndirectBufferFromWEBServer::ReadIndirectBufferFromWEBServer(const String & url_, - ContextPtr context_, - size_t max_read_tries_, - size_t buf_size_) +ReadIndirectBufferFromWebServer::ReadIndirectBufferFromWebServer(const String & url_, + ContextPtr context_, + size_t max_read_tries_, + size_t buf_size_) : BufferWithOwnMemory(buf_size_) - , log(&Poco::Logger::get("ReadIndirectBufferFromWEBServer")) + , log(&Poco::Logger::get("ReadIndirectBufferFromWebServer")) , context(context_) , url(url_) - , buffer_size(buf_size_) + , buf_size(buf_size_) , max_read_tries(max_read_tries_) { } -std::unique_ptr ReadIndirectBufferFromWEBServer::initialize() +std::unique_ptr ReadIndirectBufferFromWebServer::initialize() { Poco::URI uri(url); return std::make_unique( @@ -41,11 +41,11 @@ std::unique_ptr ReadIndirectBufferFromWEBServer::initialize() ConnectionTimeouts::getHTTPTimeouts(context), 0, Poco::Net::HTTPBasicCredentials{}, - buffer_size); + buf_size); } -bool ReadIndirectBufferFromWEBServer::nextImpl() +bool ReadIndirectBufferFromWebServer::nextImpl() { if (!impl) impl = initialize(); @@ -85,7 +85,7 @@ bool ReadIndirectBufferFromWEBServer::nextImpl() } -off_t ReadIndirectBufferFromWEBServer::seek(off_t offset_, int whence) +off_t ReadIndirectBufferFromWebServer::seek(off_t offset_, int whence) { if (impl) throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Seek is allowed only before first read attempt from the buffer"); @@ -102,7 +102,7 @@ off_t ReadIndirectBufferFromWEBServer::seek(off_t offset_, int whence) } -off_t ReadIndirectBufferFromWEBServer::getPosition() +off_t ReadIndirectBufferFromWebServer::getPosition() { return offset + count(); } diff --git a/src/IO/ReadIndirectBufferFromWEBServer.h b/src/IO/ReadIndirectBufferFromWebServer.h similarity index 54% rename from src/IO/ReadIndirectBufferFromWEBServer.h rename to src/IO/ReadIndirectBufferFromWebServer.h index a27f656b447..9a3dcff52f4 100644 --- a/src/IO/ReadIndirectBufferFromWEBServer.h +++ b/src/IO/ReadIndirectBufferFromWebServer.h @@ -9,17 +9,17 @@ namespace DB { /* Read buffer, which reads via http, but is used as ReadBufferFromFileBase. - * Used to read files, hosted on a WEB server with static files. + * Used to read files, hosted on a web server with static files. * - * Usage: ReadIndirectBufferFromRemoteFS -> SeekAvoidingReadBuffer -> ReadIndirectBufferFromWEBServer -> ReadWriteBufferFromHTTP. + * Usage: ReadIndirectBufferFromRemoteFS -> SeekAvoidingReadBuffer -> ReadIndirectBufferFromWebServer -> ReadWriteBufferFromHTTP. */ -class ReadIndirectBufferFromWEBServer : public BufferWithOwnMemory +class ReadIndirectBufferFromWebServer : public BufferWithOwnMemory { public: - explicit ReadIndirectBufferFromWEBServer(const String & url_, - ContextPtr context, - UInt64 max_read_tries_, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadIndirectBufferFromWebServer(const String & url_, + ContextPtr context_, + size_t max_read_tries_, + size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE); bool nextImpl() override; @@ -34,7 +34,7 @@ private: ContextPtr context; const String url; - size_t buffer_size, max_read_tries; + size_t buf_size, max_read_tries; std::unique_ptr impl; diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 77f61e94911..65679fa6392 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -50,4 +50,8 @@ def test_insert_select(cluster): result = node.query("SELECT count() FROM test2") assert(int(result) == 1500000) result = node.query("SELECT id FROM test2 WHERE id % 100 = 0 ORDER BY id") - assert(result == node.query("SELECT number FROM numbers(1500000) WHERE number % 100 = 0 ORDER BY number")) + assert(result == node.query("SELECT number FROM numbers(1500000) WHERE number % 100 = 0")) + result = node.query("SELECT id FROM test2 ORDER BY id") + assert(result == node.query("SELECT number FROM numbers(1500000)")) + result = node.query("SELECT id FROM test2 WHERE id > 500002 AND id < 1000448 ORDER BY id") + assert(result == node.query("SELECT number FROM numbers(1500000) WHERE number > 500002 AND number < 1000448")) From cd885ee7507c32ee46ee8977732cc9cf4ab4b236 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 14 Jun 2021 11:44:09 +0000 Subject: [PATCH 06/86] Fix build, ya make --- src/Disks/ya.make | 1 + src/IO/ReadIndirectBufferFromWebServer.cpp | 1 + src/IO/ya.make | 1 + src/Storages/HDFS/ReadBufferFromHDFS.h | 4 +++- src/Storages/HDFS/WriteBufferFromHDFS.h | 6 ++++-- 5 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Disks/ya.make b/src/Disks/ya.make index 2312dc96241..78248bf0d58 100644 --- a/src/Disks/ya.make +++ b/src/Disks/ya.make @@ -15,6 +15,7 @@ SRCS( DiskMemory.cpp DiskRestartProxy.cpp DiskSelector.cpp + DiskWebServer.cpp IDisk.cpp IDiskRemote.cpp IVolume.cpp diff --git a/src/IO/ReadIndirectBufferFromWebServer.cpp b/src/IO/ReadIndirectBufferFromWebServer.cpp index f2d01c594de..1c5e63f2f45 100644 --- a/src/IO/ReadIndirectBufferFromWebServer.cpp +++ b/src/IO/ReadIndirectBufferFromWebServer.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include diff --git a/src/IO/ya.make b/src/IO/ya.make index d8bdfa95295..38870975c5b 100644 --- a/src/IO/ya.make +++ b/src/IO/ya.make @@ -53,6 +53,7 @@ SRCS( ReadBufferFromMemory.cpp ReadBufferFromPocoSocket.cpp ReadHelpers.cpp + ReadIndirectBufferFromWebServer.cpp SeekAvoidingReadBuffer.cpp TimeoutSetter.cpp UseSSL.cpp diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index 498056ea376..497c008ce24 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -1,6 +1,8 @@ #pragma once -#include +#if !defined(ARCADIA_BUILD) + #include +#endif #if USE_HDFS #include diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.h b/src/Storages/HDFS/WriteBufferFromHDFS.h index 9dc74e69d40..174a308d3ed 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.h +++ b/src/Storages/HDFS/WriteBufferFromHDFS.h @@ -1,6 +1,8 @@ #pragma once -#include +#if !defined(ARCADIA_BUILD) + #include +#endif #if USE_HDFS #include @@ -19,7 +21,7 @@ class WriteBufferFromHDFS final : public BufferWithOwnMemory public: WriteBufferFromHDFS( - const std::string & hdfs_name_, + const String & hdfs_name_, const Poco::Util::AbstractConfiguration & config_, size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE, int flags = O_WRONLY); From 0fa6dc4570a4ead4e1fd700b34bbf19e5c1b7f01 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 18 Jun 2021 11:07:41 +0000 Subject: [PATCH 07/86] Reimplement --- src/Disks/DiskWebServer.cpp | 242 ++++++++++++++++--- src/Disks/DiskWebServer.h | 132 ++++++++-- src/Disks/IDiskRemote.cpp | 7 +- src/Disks/IDiskRemote.h | 122 ++++------ src/Disks/ReadIndirectBufferFromRemoteFS.cpp | 2 +- src/Disks/ReadIndirectBufferFromRemoteFS.h | 4 +- src/Disks/S3/DiskS3.cpp | 2 +- src/Storages/StorageSet.cpp | 15 +- 8 files changed, 393 insertions(+), 133 deletions(-) diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 9ce01e800e7..5c9ea0bab31 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -1,16 +1,24 @@ #include "DiskWebServer.h" #include -#include -#include - -#include -#include - -#include +#include #include #include +#include +#include + +#include +#include + +#include + +#define DIRECTORY_FILE_PATTERN(prefix) fmt::format("{}-(\\w+)-(\\w+\\.\\w+)", prefix) +#define ROOT_FILE_PATTERN(prefix) fmt::format("{}-(\\w+\\.\\w+)", prefix) + +#define MATCH_DIRECTORY_FILE_PATTERN(prefix) fmt::format("{}/(\\w+)/(\\w+\\.\\w+)", prefix) +#define MATCH_ROOT_FILE_PATTERN(prefix) fmt::format("{}/(\\w+\\.\\w+)", prefix) +#define MATCH_DIRECTORY_PATTERN(prefix) fmt::format("{}/(\\w+)", prefix) namespace DB @@ -22,17 +30,91 @@ namespace ErrorCodes } +static const auto store_uuid_prefix = ".*/[\\w]{3}/[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}"; + + +/// Fetch contents of .index file from given uri path. +void DiskWebServer::Metadata::initialize(const String & uri_with_path, const String & files_prefix, ContextPtr context) const +{ + ReadWriteBufferFromHTTP metadata_buf(Poco::URI(fs::path(uri_with_path) / ".index"), + Poco::Net::HTTPRequest::HTTP_GET, + ReadWriteBufferFromHTTP::OutStreamCallback(), + ConnectionTimeouts::getHTTPTimeouts(context)); + String directory, file, remote_file_name; + size_t file_size; + + while (!metadata_buf.eof()) + { + readText(remote_file_name, metadata_buf); + assertChar('\t', metadata_buf); + readIntText(file_size, metadata_buf); + assertChar('\n', metadata_buf); + LOG_DEBUG(&Poco::Logger::get("DiskWeb"), "Read file: {}, size: {}", remote_file_name, file_size); + + /* + * URI/ {prefix}-all_x_x_x-{file} + * ... + * {prefix}-format_version.txt + * {prefix}-detached-{file} + * ... + */ + if (RE2::FullMatch(remote_file_name, re2::RE2(DIRECTORY_FILE_PATTERN(files_prefix)), &directory, &file)) + { + files[directory].insert({file, file_size}); + } + else if (RE2::FullMatch(remote_file_name, re2::RE2(ROOT_FILE_PATTERN(files_prefix)), &file)) + { + files[file].insert({file, file_size}); + } + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected file: {}", remote_file_name); + } +} + + +/* Iterate list of files from .index file on a web server (its contents were put + * into DiskWebServer::Metadata) and convert them into paths as though paths in local fs. + */ +class DiskWebDirectoryIterator final : public IDiskDirectoryIterator +{ +public: + DiskWebDirectoryIterator(DiskWebServer::Metadata & metadata_, const String & directory_root_) + : metadata(metadata_), iter(metadata.files.begin()), directory_root(directory_root_) + { + } + + void next() override { ++iter; } + + bool isValid() const override { return iter != metadata.files.end(); } + + String path() const override + { + return fs::path(directory_root) / name(); + } + + String name() const override + { + return iter->first; + } + +private: + DiskWebServer::Metadata & metadata; + DiskWebServer::FilesDirectory::iterator iter; + const String directory_root; +}; + + class ReadBufferFromWebServer final : public ReadIndirectBufferFromRemoteFS { public: ReadBufferFromWebServer( - const String & url_, - DiskWebServer::Metadata metadata_, + const String & uri_, + RemoteMetadata metadata_, ContextPtr context_, size_t max_read_tries_, size_t buf_size_) : ReadIndirectBufferFromRemoteFS(metadata_) - , url(url_) + , uri(uri_) , context(context_) , max_read_tries(max_read_tries_) , buf_size(buf_size_) @@ -41,55 +123,151 @@ public: std::unique_ptr createReadBuffer(const String & path) override { - return std::make_unique(fs::path(url) / path, context, max_read_tries, buf_size); + return std::make_unique(fs::path(uri) / path, context, max_read_tries, buf_size); } private: - String url; + String uri; ContextPtr context; size_t max_read_tries; size_t buf_size; }; +class WriteBufferFromNothing : public WriteBufferFromFile +{ +public: + WriteBufferFromNothing() : WriteBufferFromFile("/dev/null") {} + + void sync() override {} +}; + + DiskWebServer::DiskWebServer( const String & disk_name_, - const String & files_root_path_url_, + const String & uri_, const String & metadata_path_, ContextPtr context_, SettingsPtr settings_) - : IDiskRemote(disk_name_, files_root_path_url_, metadata_path_, "DiskWebServer", settings_->thread_pool_size) - , WithContext(context_->getGlobalContext()) + : WithContext(context_->getGlobalContext()) + , log(&Poco::Logger::get("DiskWeb")) + , uri(uri_) + , name(disk_name_) + , metadata_path(metadata_path_) , settings(std::move(settings_)) { } +String DiskWebServer::getFileName(const String & path) const +{ + String result; + + if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN(store_uuid_prefix)) + && RE2::Extract(path, MATCH_DIRECTORY_FILE_PATTERN(".*"), fmt::format("{}-\\1-\\2", settings->files_prefix), &result)) + return result; + + if (RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN(store_uuid_prefix)) + && RE2::Extract(path, MATCH_ROOT_FILE_PATTERN(".*"), fmt::format("{}-\\1", settings->files_prefix), &result)) + return result; + + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected file: {}", path); +} + + +bool DiskWebServer::findFileInMetadata(const String & path, FileAndSize & file_info) const +{ + if (metadata.files.empty()) + metadata.initialize(uri, settings->files_prefix, getContext()); + + String directory_name, file_name; + + if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN(store_uuid_prefix), &directory_name, &file_name)) + { + const auto & directory_files = metadata.files.find(directory_name)->second; + auto file = directory_files.find(file_name); + + if (file == directory_files.end()) + return false; + + file_info = std::make_pair(file_name, file->second); + } + else if (RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN(store_uuid_prefix), &file_name)) + { + auto file = metadata.files.find(file_name); + + if (file == metadata.files.end()) + return false; + + file_info = std::make_pair(file_name, file->second.find(file_name)->second); + } + else + return false; + + return true; +} + + +bool DiskWebServer::exists(const String & path) const +{ + LOG_DEBUG(log, "Checking existance of file: {}", path); + + /// Assume root directory exists. + if (re2::RE2::FullMatch(path, re2::RE2(fmt::format("({})/", store_uuid_prefix)))) + return true; + + FileAndSize file; + return findFileInMetadata(path, file); +} + + std::unique_ptr DiskWebServer::readFile(const String & path, size_t buf_size, size_t, size_t, size_t, MMappedFileCache *) const { - auto metadata = readMeta(path); + LOG_DEBUG(log, "Read from file by path: {}", path); - LOG_DEBUG(log, "Read from file by path: {}. Existing objects: {}", backQuote(metadata_path + path), metadata.remote_fs_objects.size()); + FileAndSize file; + if (!findFileInMetadata(path, file)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} not found", path); - auto reader = std::make_unique(remote_fs_root_path, metadata, getContext(), 1, buf_size); + RemoteMetadata meta(uri, fs::path(path).parent_path() / fs::path(path).filename()); + meta.remote_fs_objects.emplace_back(std::make_pair(getFileName(path), file.second)); + + auto reader = std::make_unique(uri, meta, getContext(), settings->max_read_tries, buf_size); return std::make_unique(std::move(reader), settings->min_bytes_for_seek); } -std::unique_ptr DiskWebServer::writeFile(const String & path, size_t buf_size, WriteMode mode) +std::unique_ptr DiskWebServer::writeFile(const String &, size_t, WriteMode) { - auto metadata = readOrCreateMetaForWriting(path, mode); + return std::make_unique(); +} - auto file_name = generateName(); - String file_path = fs::path(remote_fs_root_path) / file_name; - LOG_DEBUG(log, "Write to file url: {}", file_path); +DiskDirectoryIteratorPtr DiskWebServer::iterateDirectory(const String & path) +{ + LOG_DEBUG(log, "Iterate directory: {}", path); + return std::make_unique(metadata, path); +} - auto timeouts = ConnectionTimeouts::getHTTPTimeouts(getContext()); - Poco::URI uri(file_path); - auto writer = std::make_unique(uri, Poco::Net::HTTPRequest::HTTP_PUT, timeouts, buf_size); - return std::make_unique>(std::move(writer), std::move(metadata), file_name); +size_t DiskWebServer::getFileSize(const String & path) const +{ + FileAndSize file; + if (!findFileInMetadata(path, file)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} not found", path); + return file.second; +} + + +bool DiskWebServer::isFile(const String & path) const +{ + return RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN(".*")) || RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN(".*")); +} + + +bool DiskWebServer::isDirectory(const String & path) const +{ + return RE2::FullMatch(path, MATCH_DIRECTORY_PATTERN(".*")); } @@ -103,18 +281,18 @@ void registerDiskWebServer(DiskFactory & factory) fs::path disk = fs::path(context->getPath()) / "disks" / disk_name; fs::create_directories(disk); - String url{config.getString(config_prefix + ".endpoint")}; - if (!url.ends_with('/')) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "URL must end with '/', but '{}' doesn't.", url); + String uri{config.getString(config_prefix + ".endpoint")}; + if (!uri.ends_with('/')) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "URI must end with '/', but '{}' doesn't.", uri); auto settings = std::make_unique( context->getGlobalContext()->getSettingsRef().http_max_single_read_retries, config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024), - config.getInt(config_prefix + ".thread_pool_size", 16)); + config.getString(config_prefix + ".files_prefix", disk_name)); String metadata_path = fs::path(context->getPath()) / "disks" / disk_name / ""; - return std::make_shared(disk_name, url, metadata_path, context, std::move(settings)); + return std::make_shared(disk_name, uri, metadata_path, context, std::move(settings)); }; factory.registerDiskType("web", creator); diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 1a653bc49f7..2cddb1c4ba1 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -1,7 +1,9 @@ #pragma once #include +#include #include +#include namespace DB @@ -13,43 +15,137 @@ struct DiskWebServerSettings size_t max_read_tries; /// Passed to SeekAvoidingReadBuffer. size_t min_bytes_for_seek; - /// Used by IDiskRemote. - size_t thread_pool_size; + String files_prefix; - DiskWebServerSettings(size_t max_read_tries_, size_t min_bytes_for_seek_, size_t thread_pool_size_) - : max_read_tries(max_read_tries_) , min_bytes_for_seek(min_bytes_for_seek_) , thread_pool_size(thread_pool_size_) {} + DiskWebServerSettings(size_t max_read_tries_, size_t min_bytes_for_seek_, String files_prefix_) + : max_read_tries(max_read_tries_) , min_bytes_for_seek(min_bytes_for_seek_), files_prefix(files_prefix_) {} }; /// Storage to store data on a web server and metadata on the local disk. -class DiskWebServer : public IDiskRemote, WithContext +class DiskWebServer : public IDisk, WithContext { using SettingsPtr = std::unique_ptr; public: DiskWebServer(const String & disk_name_, - const String & files_root_path_url_, - const String & metadata_path_, - ContextPtr context, - SettingsPtr settings_); + const String & files_root_path_uri_, + const String & metadata_path_, + ContextPtr context, + SettingsPtr settings_); + + using FileAndSize = std::pair; + using FilesInfo = std::unordered_map; + using FilesDirectory = std::map; + + struct Metadata + { + /// Fetch meta only when required. + mutable FilesDirectory files; + + Metadata() {} + void initialize(const String & uri_with_path, const String & files_prefix, ContextPtr context) const; + }; + + bool findFileInMetadata(const String & path, FileAndSize & file_info) const; + + String getFileName(const String & path) const; DiskType::Type getType() const override { return DiskType::Type::WebServer; } - std::unique_ptr readFile( - const String & path, - size_t buf_size, - size_t estimated_size, - size_t aio_threshold, - size_t mmap_threshold, - MMappedFileCache * mmap_cache) const override; + std::unique_ptr readFile(const String & path, + size_t buf_size, + size_t estimated_size, + size_t aio_threshold, + size_t mmap_threshold, + MMappedFileCache * mmap_cache) const override; + /// Disk info - std::unique_ptr writeFile(const String & path, size_t buf_size, WriteMode mode) override; + const String & getName() const final override { return name; } + + /// ??? + const String & getPath() const final override { return metadata_path; } + + UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } + + UInt64 getAvailableSpace() const final override { return std::numeric_limits::max(); } + + UInt64 getUnreservedSpace() const final override { return std::numeric_limits::max(); } + + /// Read-only part + + bool exists(const String & path) const override; + + bool isFile(const String & path) const override; + + size_t getFileSize(const String & path) const override; + + void listFiles(const String & /* path */, std::vector & /* file_names */) override { } + + void setReadOnly(const String & /* path */) override {} + + bool isDirectory(const String & path) const override; + + DiskDirectoryIteratorPtr iterateDirectory(const String & /* path */) override; + + Poco::Timestamp getLastModified(const String &) override { return Poco::Timestamp{}; } + + ReservationPtr reserve(UInt64 /*bytes*/) override { return nullptr; } + + /// Write and modification part + + std::unique_ptr writeFile(const String &, size_t, WriteMode) override; + + void moveFile(const String &, const String &) override {} + + void replaceFile(const String &, const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } + + void removeFile(const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } + + void removeFileIfExists(const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } + + void removeRecursive(const String &) override {} + + void removeSharedFile(const String &, bool) override {} + + void removeSharedRecursive(const String &, bool) override {} + + void clearDirectory(const String &) override {} + + void moveDirectory(const String &, const String &) override {} + + void removeDirectory(const String &) override {} + + void setLastModified(const String &, const Poco::Timestamp &) override {} + + /// Create part + + void createFile(const String &) final override {} + + void createDirectory(const String &) override {} + + void createDirectories(const String &) override {} + + void createHardLink(const String &, const String &) override {} private: - String generateName() { return toString(UUIDHelpers::generateV4()); } + Poco::Logger * log; + String uri, name; + const String metadata_path; SettingsPtr settings; + + Metadata metadata; }; } diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index 3e2ea2457a6..1f3b95bba0e 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -33,10 +33,9 @@ IDiskRemote::Metadata::Metadata( const String & disk_path_, const String & metadata_file_path_, bool create) - : remote_fs_root_path(remote_fs_root_path_) + : RemoteMetadata(remote_fs_root_path_, metadata_file_path_) , disk_path(disk_path_) - , metadata_file_path(metadata_file_path_) - , total_size(0), remote_fs_objects(0), ref_count(0) + , total_size(0), ref_count(0) { if (create) return; @@ -416,7 +415,7 @@ void IDiskRemote::removeDirectory(const String & path) DiskDirectoryIteratorPtr IDiskRemote::iterateDirectory(const String & path) { - return std::make_unique(metadata_path + path, path); + return std::make_unique(fs::path(metadata_path) / path, path); } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 672ee6468e6..eca91c50cc5 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -17,14 +17,8 @@ namespace fs = std::filesystem; namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - /// Helper class to collect paths into chunks of maximum size. -/// For diskS3 it is Aws::vector, for diskHDFS it is std::vector. -/// For diskWEBServer not implemented. +/// For s3 it is Aws::vector, for hdfs it is std::vector. class RemoteFSPathKeeper { public: @@ -41,10 +35,10 @@ protected: using RemoteFSPathKeeperPtr = std::shared_ptr; -/// Base Disk class for remote FS's, which are not posix-compatible. -/// Used to implement disks over s3, hdfs, web-server. +/// Base Disk class for remote FS's, which are not posix-compatible (DiskS3 and DiskHDFS) class IDiskRemote : public IDisk { + friend class DiskRemoteReservation; public: @@ -55,50 +49,34 @@ public: const String & log_name_, size_t thread_pool_size); - /// Methods to manage local metadata of remote FS objects. - struct Metadata; + const String & getName() const final override { return name; } + + const String & getPath() const final override { return metadata_path; } + Metadata readMeta(const String & path) const; Metadata createMeta(const String & path) const; Metadata readOrCreateMetaForWriting(const String & path, WriteMode mode); - /// Disk info + UInt64 getTotalSpace() const override { return std::numeric_limits::max(); } - const String & getName() const final override { return name; } + UInt64 getAvailableSpace() const override { return std::numeric_limits::max(); } - const String & getPath() const final override { return metadata_path; } + UInt64 getUnreservedSpace() const override { return std::numeric_limits::max(); } - UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } - - UInt64 getAvailableSpace() const final override { return std::numeric_limits::max(); } - - UInt64 getUnreservedSpace() const final override { return std::numeric_limits::max(); } - - /// Read-only part + UInt64 getKeepingFreeSpace() const override { return 0; } bool exists(const String & path) const override; bool isFile(const String & path) const override; + void createFile(const String & path) override; + size_t getFileSize(const String & path) const override; - void listFiles(const String & path, std::vector & file_names) override; - - void setReadOnly(const String & path) override; - - bool isDirectory(const String & path) const override; - - DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; - - Poco::Timestamp getLastModified(const String & path) override; - - ReservationPtr reserve(UInt64 bytes) override; - - /// Write and modification part - void moveFile(const String & from_path, const String & to_path) override; void replaceFile(const String & from_path, const String & to_path) override; @@ -113,42 +91,39 @@ public: void removeSharedRecursive(const String & path, bool keep_in_remote_fs) override; + void listFiles(const String & path, std::vector & file_names) override; + + void setReadOnly(const String & path) override; + + bool isDirectory(const String & path) const override; + + void createDirectory(const String & path) override; + + void createDirectories(const String & path) override; + void clearDirectory(const String & path) override; void moveDirectory(const String & from_path, const String & to_path) override { moveFile(from_path, to_path); } void removeDirectory(const String & path) override; + DiskDirectoryIteratorPtr iterateDirectory(const String & path) override; + void setLastModified(const String & path, const Poco::Timestamp & timestamp) override; - /// Overridden by disks s3 and hdfs. - virtual void removeFromRemoteFS(RemoteFSPathKeeperPtr /* fs_paths_keeper */) - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} does not support removing remote files", getName()); - } - - /// Overridden by disks s3 and hdfs. - virtual RemoteFSPathKeeperPtr createFSPathKeeper() const - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} does not support FS paths keeper", getName()); - } - - /// Create part - - void createFile(const String & path) final override; - - void createDirectory(const String & path) override; - - void createDirectories(const String & path) override; + Poco::Timestamp getLastModified(const String & path) override; void createHardLink(const String & src_path, const String & dst_path) override; + ReservationPtr reserve(UInt64 bytes) override; + + virtual void removeFromRemoteFS(RemoteFSPathKeeperPtr fs_paths_keeper) = 0; + + virtual RemoteFSPathKeeperPtr createFSPathKeeper() const = 0; + protected: Poco::Logger * log; - - /// Disk name const String name; - /// URL + root path to store files in remote FS. const String remote_fs_root_path; const String metadata_path; @@ -168,11 +143,29 @@ private: using RemoteDiskPtr = std::shared_ptr; -/// Remote FS (S3, HDFS, WEB-server) metadata file layout: +/// Minimum info, required to be passed to ReadIndirectBufferFromRemoteFS +struct RemoteMetadata +{ + using PathAndSize = std::pair; + + /// Remote FS objects paths and their sizes. + std::vector remote_fs_objects; + + /// URI + const String & remote_fs_root_path; + + /// Relative path to metadata file on local FS. + const String & metadata_file_path; + + RemoteMetadata(const String & remote_fs_root_path_, const String & metadata_file_path_) + : remote_fs_root_path(remote_fs_root_path_), metadata_file_path(metadata_file_path_) {} +}; + +/// Remote FS (S3, HDFS) metadata file layout: /// FS objects, their number and total size of all FS objects. /// Each FS object represents a file path in remote FS and its size. -struct IDiskRemote::Metadata +struct IDiskRemote::Metadata : RemoteMetadata { /// Metadata file version. static constexpr UInt32 VERSION_ABSOLUTE_PATHS = 1; @@ -181,21 +174,12 @@ struct IDiskRemote::Metadata using PathAndSize = std::pair; - /// Remote FS (S3, HDFS, WEB-server) root path (uri + files directory path). - const String & remote_fs_root_path; - /// Disk path. const String & disk_path; - /// Relative path to metadata file on local FS. - String metadata_file_path; - - /// Total size of all remote FS objects. + /// Total size of all remote FS (S3, HDFS) objects. size_t total_size = 0; - /// Remote FS objects paths and their sizes. - std::vector remote_fs_objects; - /// Number of references (hardlinks) to this metadata file. UInt32 ref_count = 0; diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp index b2984685249..f6eb1afc655 100644 --- a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp @@ -16,7 +16,7 @@ namespace ErrorCodes template ReadIndirectBufferFromRemoteFS::ReadIndirectBufferFromRemoteFS( - IDiskRemote::Metadata metadata_) + RemoteMetadata metadata_) : metadata(std::move(metadata_)) { } diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.h b/src/Disks/ReadIndirectBufferFromRemoteFS.h index d3e61dc1289..065ec3c3cf8 100644 --- a/src/Disks/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/ReadIndirectBufferFromRemoteFS.h @@ -17,7 +17,7 @@ template class ReadIndirectBufferFromRemoteFS : public ReadBufferFromFileBase { public: - ReadIndirectBufferFromRemoteFS(IDiskRemote::Metadata metadata_); + ReadIndirectBufferFromRemoteFS(RemoteMetadata metadata_); off_t seek(off_t offset_, int whence) override; @@ -28,7 +28,7 @@ public: virtual std::unique_ptr createReadBuffer(const String & path) = 0; protected: - IDiskRemote::Metadata metadata; + RemoteMetadata metadata; private: std::unique_ptr initialize(); diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 1437373f26a..48ad2187757 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -134,7 +134,7 @@ public: std::unique_ptr createReadBuffer(const String & path) override { - return std::make_unique(client_ptr, bucket, metadata.remote_fs_root_path + path, max_single_read_retries, buf_size); + return std::make_unique(client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries, buf_size); } private: diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index f585a5747b8..d12ca19364f 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -13,6 +13,9 @@ #include #include #include +#include + +namespace fs = std::filesystem; namespace DB @@ -67,7 +70,7 @@ SetOrJoinBlockOutputStream::SetOrJoinBlockOutputStream( , backup_path(backup_path_) , backup_tmp_path(backup_tmp_path_) , backup_file_name(backup_file_name_) - , backup_buf(table_.disk->writeFile(backup_tmp_path + backup_file_name)) + , backup_buf(table_.disk->writeFile(fs::path(backup_tmp_path) / backup_file_name)) , compressed_backup_buf(*backup_buf) , backup_stream(compressed_backup_buf, 0, metadata_snapshot->getSampleBlock()) , persistent(persistent_) @@ -94,7 +97,7 @@ void SetOrJoinBlockOutputStream::writeSuffix() backup_buf->next(); backup_buf->finalize(); - table.disk->replaceFile(backup_tmp_path + backup_file_name, backup_path + backup_file_name); + table.disk->replaceFile(fs::path(backup_tmp_path) / backup_file_name, fs::path(backup_path) / backup_file_name); } } @@ -102,7 +105,7 @@ void SetOrJoinBlockOutputStream::writeSuffix() BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/) { UInt64 id = ++increment; - return std::make_shared(*this, metadata_snapshot, path, path + "tmp/", toString(id) + ".bin", persistent); + return std::make_shared(*this, metadata_snapshot, path, fs::path(path) / "tmp/", toString(id) + ".bin", persistent); } @@ -161,7 +164,7 @@ void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_sn { disk->removeRecursive(path); disk->createDirectories(path); - disk->createDirectories(path + "tmp/"); + disk->createDirectories(fs::path(path) / "tmp/"); Block header = metadata_snapshot->getSampleBlock(); header = header.sortColumns(); @@ -174,9 +177,9 @@ void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_sn void StorageSetOrJoinBase::restore() { - if (!disk->exists(path + "tmp/")) + if (!disk->exists(fs::path(path) / "tmp/")) { - disk->createDirectories(path + "tmp/"); + disk->createDirectories(fs::path(path) / "tmp/"); return; } From 7cc6588f96b9406b85165c82ea75f98a458ce6c4 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 18 Jun 2021 11:21:05 +0000 Subject: [PATCH 08/86] Tool to put files on server --- programs/CMakeLists.txt | 18 ++- programs/main.cpp | 6 + programs/web-server-exporter/CMakeLists.txt | 9 ++ .../web-server-exporter.cpp | 116 ++++++++++++++++++ 4 files changed, 145 insertions(+), 4 deletions(-) create mode 100644 programs/web-server-exporter/CMakeLists.txt create mode 100644 programs/web-server-exporter/web-server-exporter.cpp diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 12aec76a303..bec2238ab67 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -44,9 +44,9 @@ option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Li ${ENABLE_CLICKHOUSE_ALL}) # https://presentations.clickhouse.tech/matemarketing_2020/ -option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories" - ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER "A tool to put table data files to a web server" ${ENABLE_CLICKHOUSE_ALL}) option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_CLICKHOUSE_ALL}) if (NOT USE_NURAFT) @@ -216,6 +216,7 @@ add_subdirectory (obfuscator) add_subdirectory (install) add_subdirectory (git-import) add_subdirectory (bash-completion) +add_subdirectory (web-server-exporter) if (ENABLE_CLICKHOUSE_KEEPER) add_subdirectory (keeper) @@ -230,8 +231,8 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE) endif () if (CLICKHOUSE_ONE_SHARED) - add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${CLICKHOUSE_KEEPER_SOURCES}) - target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK} ${CLICKHOUSE_KEEPER_LINK}) + add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} ${CLICKHOUSE_CLIENT_SOURCES} ${CLICKHOUSE_LOCAL_SOURCES} ${CLICKHOUSE_BENCHMARK_SOURCES} ${CLICKHOUSE_COPIER_SOURCES} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_SOURCES} ${CLICKHOUSE_COMPRESSOR_SOURCES} ${CLICKHOUSE_FORMAT_SOURCES} ${CLICKHOUSE_OBFUSCATOR_SOURCES} ${CLICKHOUSE_GIT_IMPORT_SOURCES} ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${CLICKHOUSE_KEEPER_SOURCES} ${CLICKHOUSE_WEB_SERVER_EXPORTER_SOURCES}) + target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} ${CLICKHOUSE_CLIENT_LINK} ${CLICKHOUSE_LOCAL_LINK} ${CLICKHOUSE_BENCHMARK_LINK} ${CLICKHOUSE_COPIER_LINK} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_LINK} ${CLICKHOUSE_COMPRESSOR_LINK} ${CLICKHOUSE_FORMAT_LINK} ${CLICKHOUSE_OBFUSCATOR_LINK} ${CLICKHOUSE_GIT_IMPORT_LINK} ${CLICKHOUSE_ODBC_BRIDGE_LINK} ${CLICKHOUSE_KEEPER_LINK} ${CLICKHOUSE_WEB_SERVER_EXPORTER_LINK}) target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} ${CLICKHOUSE_CLIENT_INCLUDE} ${CLICKHOUSE_LOCAL_INCLUDE} ${CLICKHOUSE_BENCHMARK_INCLUDE} ${CLICKHOUSE_COPIER_INCLUDE} ${CLICKHOUSE_EXTRACT_FROM_CONFIG_INCLUDE} ${CLICKHOUSE_COMPRESSOR_INCLUDE} ${CLICKHOUSE_FORMAT_INCLUDE} ${CLICKHOUSE_OBFUSCATOR_INCLUDE} ${CLICKHOUSE_GIT_IMPORT_INCLUDE} ${CLICKHOUSE_ODBC_BRIDGE_INCLUDE} ${CLICKHOUSE_KEEPER_INCLUDE}) set_target_properties(clickhouse-lib PROPERTIES SOVERSION ${VERSION_MAJOR}.${VERSION_MINOR} VERSION ${VERSION_SO} OUTPUT_NAME clickhouse DEBUG_POSTFIX "") install (TARGETS clickhouse-lib LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) @@ -249,6 +250,7 @@ if (CLICKHOUSE_SPLIT_BINARY) clickhouse-obfuscator clickhouse-git-import clickhouse-copier + clickhouse-web-server-exporter ) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) @@ -310,6 +312,9 @@ else () if (ENABLE_CLICKHOUSE_GIT_IMPORT) clickhouse_target_link_split_lib(clickhouse git-import) endif () + if (ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER) + clickhouse_target_link_split_lib(clickhouse web-server-exporter) + endif () if (ENABLE_CLICKHOUSE_KEEPER) clickhouse_target_link_split_lib(clickhouse keeper) endif() @@ -368,6 +373,11 @@ else () install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import) endif () + if (ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER) + add_custom_target (clickhouse-web-server-exporter ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-web-server-exporter DEPENDS clickhouse) + install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-web-server-exporter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-web-server-exporter) + endif () if (ENABLE_CLICKHOUSE_KEEPER) add_custom_target (clickhouse-keeper ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper DEPENDS clickhouse) install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/main.cpp b/programs/main.cpp index c5df2596422..c3320e5be1a 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -59,6 +59,9 @@ int mainEntryClickHouseGitImport(int argc, char ** argv); #if ENABLE_CLICKHOUSE_KEEPER int mainEntryClickHouseKeeper(int argc, char ** argv); #endif +#if ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER +int mainEntryClickHouseWebServerExporter(int argc, char ** argv); +#endif #if ENABLE_CLICKHOUSE_INSTALL int mainEntryClickHouseInstall(int argc, char ** argv); int mainEntryClickHouseStart(int argc, char ** argv); @@ -125,6 +128,9 @@ std::pair clickhouse_applications[] = {"stop", mainEntryClickHouseStop}, {"status", mainEntryClickHouseStatus}, {"restart", mainEntryClickHouseRestart}, +#endif +#if ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER + {"web-server-exporter", mainEntryClickHouseWebServerExporter}, #endif {"hash-binary", mainEntryClickHouseHashBinary}, }; diff --git a/programs/web-server-exporter/CMakeLists.txt b/programs/web-server-exporter/CMakeLists.txt new file mode 100644 index 00000000000..c08d13d473b --- /dev/null +++ b/programs/web-server-exporter/CMakeLists.txt @@ -0,0 +1,9 @@ +set (CLICKHOUSE_WEB_SERVER_EXPORTER_SOURCES web-server-exporter.cpp) + +set (CLICKHOUSE_WEB_SERVER_EXPORTER_LINK + PRIVATE + boost::program_options + dbms +) + +clickhouse_program_add(web-server-exporter) diff --git a/programs/web-server-exporter/web-server-exporter.cpp b/programs/web-server-exporter/web-server-exporter.cpp new file mode 100644 index 00000000000..6461147346a --- /dev/null +++ b/programs/web-server-exporter/web-server-exporter.cpp @@ -0,0 +1,116 @@ +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace fs = std::filesystem; + + +namespace DB +{ + +void processTableFiles(const String & url, const fs::path & path, const String & files_prefix) +{ + WriteBufferFromHTTP metadata_buf(Poco::URI(fs::path(url) / ".index"), Poco::Net::HTTPRequest::HTTP_PUT); + fs::directory_iterator dir_end; + + auto process_file = [&](const String & file_name, const String & file_path) + { + auto remote_file_name = files_prefix + "-" + file_name; + writeText(remote_file_name, metadata_buf); + writeChar('\t', metadata_buf); + writeIntText(fs::file_size(file_path), metadata_buf); + writeChar('\n', metadata_buf); + + auto src_buf = createReadBufferFromFileBase(file_path, fs::file_size(file_path), 0, 0, nullptr); + WriteBufferFromHTTP dst_buf(Poco::URI(fs::path(url) / remote_file_name), Poco::Net::HTTPRequest::HTTP_PUT); + + copyData(*src_buf, dst_buf); + dst_buf.next(); + dst_buf.finalize(); + }; + + for (fs::directory_iterator dir_it(path); dir_it != dir_end; ++dir_it) + { + if (dir_it->is_directory()) + { + fs::directory_iterator files_end; + for (fs::directory_iterator file_it(dir_it->path()); file_it != files_end; ++file_it) + { + process_file(dir_it->path().filename().string() + "-" + file_it->path().filename().string(), file_it->path()); + } + } + else + { + process_file(dir_it->path().filename(), dir_it->path()); + } + } + + metadata_buf.next(); + metadata_buf.finalize(); +} + +} + + +int mainEntryClickHouseWebServerExporter(int argc, char ** argv) +{ + using namespace DB; + namespace po = boost::program_options; + + po::options_description description("Allowed options", getTerminalWidth()); + description.add_options() + ("help,h", "produce help message") + ("metadata-path", po::value(), "Metadata path (select data_paths from system.tables where name='table_name'") + ("url", po::value(), "Web server url") + ("files-prefix", po::value(), "Prefix for stored files"); + + po::parsed_options parsed = po::command_line_parser(argc, argv).options(description).run(); + po::variables_map options; + po::store(parsed, options); + po::notify(options); + + if (options.empty() || options.count("help")) + { + std::cout << description << std::endl; + exit(0); + } + + String url, metadata_path, files_prefix; + + if (options.count("metadata-path")) + metadata_path = options["metadata-path"].as(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No metadata-path option passed"); + + if (options.count("files-prefix")) + files_prefix = options["files-prefix"].as(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No files-prefix option passed"); + + fs::path fs_path = fs::canonical(metadata_path); + re2::RE2 matcher("(.*/[\\w]{3}/[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12})/(.*)"); + + if (!re2::RE2::FullMatch(metadata_path, matcher)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected path: {}", metadata_path); + + if (options.count("url")) + url = options["url"].as(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No url option passed"); + + processTableFiles(url, fs_path, files_prefix); + + return 0; +} From b30cb3f7dab47c96caca8c55ef8a11838e19c454 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 18 Jun 2021 14:13:00 +0000 Subject: [PATCH 09/86] Test --- programs/config_tools.h.in | 1 + .../configs/storage_conf.xml | 17 ++---- .../configs/storage_conf_web.xml | 21 +++++++ .../test_disk_over_web_server/test.py | 57 ++++++++----------- 4 files changed, 50 insertions(+), 46 deletions(-) create mode 100644 tests/integration/test_disk_over_web_server/configs/storage_conf_web.xml diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index 50ba0c16a83..91484348586 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -17,3 +17,4 @@ #cmakedefine01 ENABLE_CLICKHOUSE_ODBC_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER +#cmakedefine01 ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER diff --git a/tests/integration/test_disk_over_web_server/configs/storage_conf.xml b/tests/integration/test_disk_over_web_server/configs/storage_conf.xml index 349725882d6..be3b8b850bc 100644 --- a/tests/integration/test_disk_over_web_server/configs/storage_conf.xml +++ b/tests/integration/test_disk_over_web_server/configs/storage_conf.xml @@ -2,26 +2,19 @@ - - web - http://nginx:80/ - - + local / - + - +
- web + def
- - hdd -
-
+
diff --git a/tests/integration/test_disk_over_web_server/configs/storage_conf_web.xml b/tests/integration/test_disk_over_web_server/configs/storage_conf_web.xml new file mode 100644 index 00000000000..c074b078dad --- /dev/null +++ b/tests/integration/test_disk_over_web_server/configs/storage_conf_web.xml @@ -0,0 +1,21 @@ + + + + + + web + http://nginx:80/test1/ + data + + + + + +
+ web +
+
+
+
+
+
diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 65679fa6392..ccd619ccc78 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -7,7 +7,8 @@ from helpers.cluster import ClickHouseCluster def cluster(): try: cluster = ClickHouseCluster(__file__) - cluster.add_instance("node", main_configs=["configs/storage_conf.xml"], with_nginx=True) + cluster.add_instance("node1", main_configs=["configs/storage_conf.xml"], with_nginx=True) + cluster.add_instance("node2", main_configs=["configs/storage_conf_web.xml"], with_nginx=True) cluster.start() yield cluster @@ -15,43 +16,31 @@ def cluster(): cluster.shutdown() -def test_insert_select(cluster): - node = cluster.instances["node"] - node.query(""" - CREATE TABLE test1 (id Int32) +def test_usage(cluster): + node1 = cluster.instances["node1"] + node1.query(""" + CREATE TABLE data (id Int32) ENGINE = MergeTree() ORDER BY id - SETTINGS storage_policy = 'web'; + SETTINGS storage_policy = 'def'; """) + node1.query("INSERT INTO data SELECT number FROM numbers(100)") + expected = node1.query("SELECT * FROM data ORDER BY id") - node.query("INSERT INTO test1 SELECT number FROM numbers(100)") - result = node.query("SELECT count() FROM test1") - assert(int(result) == 100) + metadata_path = node1.query("SELECT data_paths FROM system.tables WHERE name='data'") + metadata_path = metadata_path[metadata_path.find('/'):metadata_path.rfind('/')+1] + print(f'Metadata: {metadata_path}') - node.query("DETACH TABLE test1") - node.query("ATTACH TABLE test1") - result = node.query("SELECT count() FROM test1") - assert(int(result) == 100) + node1.exec_in_container(['bash', '-c', + '/usr/bin/clickhouse web-server-exporter --files-prefix data --url http://nginx:80/test1 --metadata-path {}'.format(metadata_path)], user='root') + parts = metadata_path.split('/') + uuid = parts[3] + print(f'UUID: {uuid}') + node2 = cluster.instances["node2"] - node = cluster.instances["node"] - node.query(""" - CREATE TABLE test2 (id Int32) - ENGINE = MergeTree() ORDER BY id + node2.query(""" + ATTACH TABLE test1 UUID '{}' + (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'; - """) - - node.query("INSERT INTO test2 SELECT number FROM numbers(500000)") - result = node.query("SELECT id FROM test2 ORDER BY id") - expected = node.query("SELECT number FROM numbers(500000)") + """.format(uuid)) + result = node2.query("SELECT * FROM test1 ORDER BY id") assert(result == expected) - node.query("INSERT INTO test2 SELECT number FROM numbers(500000, 500000)") - node.query("DETACH TABLE test2") - node.query("ATTACH TABLE test2") - node.query("INSERT INTO test2 SELECT number FROM numbers(1000000, 500000)") - result = node.query("SELECT count() FROM test2") - assert(int(result) == 1500000) - result = node.query("SELECT id FROM test2 WHERE id % 100 = 0 ORDER BY id") - assert(result == node.query("SELECT number FROM numbers(1500000) WHERE number % 100 = 0")) - result = node.query("SELECT id FROM test2 ORDER BY id") - assert(result == node.query("SELECT number FROM numbers(1500000)")) - result = node.query("SELECT id FROM test2 WHERE id > 500002 AND id < 1000448 ORDER BY id") - assert(result == node.query("SELECT number FROM numbers(1500000) WHERE number > 500002 AND number < 1000448")) From 7f356a5fbc5f88fd2aeb2c39e77fb36ce19a3d98 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 18 Jun 2021 14:32:08 +0000 Subject: [PATCH 10/86] Fix readbuffer --- src/Disks/DiskWebServer.h | 3 --- src/Disks/IDiskRemote.h | 2 +- src/IO/ReadIndirectBufferFromWebServer.cpp | 14 ++++++++++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 2cddb1c4ba1..cf7f2419999 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -22,8 +22,6 @@ struct DiskWebServerSettings }; -/// Storage to store data on a web server and metadata on the local disk. - class DiskWebServer : public IDisk, WithContext { using SettingsPtr = std::unique_ptr; @@ -64,7 +62,6 @@ public: const String & getName() const final override { return name; } - /// ??? const String & getPath() const final override { return metadata_path; } UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index eca91c50cc5..9e97598f43a 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -155,7 +155,7 @@ struct RemoteMetadata const String & remote_fs_root_path; /// Relative path to metadata file on local FS. - const String & metadata_file_path; + const String metadata_file_path; RemoteMetadata(const String & remote_fs_root_path_, const String & metadata_file_path_) : remote_fs_root_path(remote_fs_root_path_), metadata_file_path(metadata_file_path_) {} diff --git a/src/IO/ReadIndirectBufferFromWebServer.cpp b/src/IO/ReadIndirectBufferFromWebServer.cpp index 1c5e63f2f45..ef21f16e357 100644 --- a/src/IO/ReadIndirectBufferFromWebServer.cpp +++ b/src/IO/ReadIndirectBufferFromWebServer.cpp @@ -35,6 +35,10 @@ ReadIndirectBufferFromWebServer::ReadIndirectBufferFromWebServer(const String & std::unique_ptr ReadIndirectBufferFromWebServer::initialize() { Poco::URI uri(url); + + ReadWriteBufferFromHTTP::HTTPHeaderEntries headers; + headers.emplace_back(std::make_pair("Range:", fmt::format("bytes:{}-", offset))); + return std::make_unique( uri, Poco::Net::HTTPRequest::HTTP_GET, @@ -51,8 +55,6 @@ bool ReadIndirectBufferFromWebServer::nextImpl() if (!impl) impl = initialize(); - pos = impl->position(); - bool ret = false, successful_read = false; auto sleep_milliseconds = std::chrono::milliseconds(100); @@ -67,6 +69,9 @@ bool ReadIndirectBufferFromWebServer::nextImpl() catch (const Exception & e) { LOG_WARNING(log, "Read attempt {}/{} failed from {}. ({})", try_num, max_read_tries, url, e.message()); + + impl.reset(); + impl = initialize(); } std::this_thread::sleep_for(sleep_milliseconds); @@ -79,7 +84,8 @@ bool ReadIndirectBufferFromWebServer::nextImpl() if (ret) { working_buffer = internal_buffer = impl->buffer(); - /// Do not update pos here, because it is anyway overwritten after nextImpl() in ReadBuffer::next(). + pos = working_buffer.begin(); + offset += working_buffer.size(); } return ret; @@ -105,7 +111,7 @@ off_t ReadIndirectBufferFromWebServer::seek(off_t offset_, int whence) off_t ReadIndirectBufferFromWebServer::getPosition() { - return offset + count(); + return offset - available(); } } From 20316076420e55f22b2aa00ff18b2e5dcdfbc880 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 19 Jun 2021 11:26:48 +0000 Subject: [PATCH 11/86] Reimplement --- .../web-server-exporter.cpp | 19 ++- src/Disks/DiskWebServer.cpp | 140 +++++++++--------- src/Disks/DiskWebServer.h | 29 +++- .../test_disk_over_web_server/test.py | 55 ++++--- 4 files changed, 139 insertions(+), 104 deletions(-) diff --git a/programs/web-server-exporter/web-server-exporter.cpp b/programs/web-server-exporter/web-server-exporter.cpp index 6461147346a..ca2813b4dfb 100644 --- a/programs/web-server-exporter/web-server-exporter.cpp +++ b/programs/web-server-exporter/web-server-exporter.cpp @@ -1,5 +1,3 @@ -#include - #include #include @@ -16,18 +14,20 @@ namespace fs = std::filesystem; +#define UUID_PATTERN "[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}" +#define EXTRACT_UUID_PATTERN fmt::format(".*/({})/.*", UUID_PATTERN) namespace DB { -void processTableFiles(const String & url, const fs::path & path, const String & files_prefix) +void processTableFiles(const String & url, const fs::path & path, const String & files_prefix, String uuid) { - WriteBufferFromHTTP metadata_buf(Poco::URI(fs::path(url) / ".index"), Poco::Net::HTTPRequest::HTTP_PUT); + WriteBufferFromHTTP metadata_buf(Poco::URI(fs::path(url) / (".index-" + uuid)), Poco::Net::HTTPRequest::HTTP_PUT); fs::directory_iterator dir_end; auto process_file = [&](const String & file_name, const String & file_path) { - auto remote_file_name = files_prefix + "-" + file_name; + auto remote_file_name = files_prefix + "-" + uuid + "-" + file_name; writeText(remote_file_name, metadata_buf); writeChar('\t', metadata_buf); writeIntText(fs::file_size(file_path), metadata_buf); @@ -100,17 +100,16 @@ int mainEntryClickHouseWebServerExporter(int argc, char ** argv) throw Exception(ErrorCodes::BAD_ARGUMENTS, "No files-prefix option passed"); fs::path fs_path = fs::canonical(metadata_path); - re2::RE2 matcher("(.*/[\\w]{3}/[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12})/(.*)"); - - if (!re2::RE2::FullMatch(metadata_path, matcher)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected path: {}", metadata_path); + String uuid; + if (!RE2::Extract(metadata_path, EXTRACT_UUID_PATTERN, "\\1", &uuid)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot extract uuid for: {}", metadata_path); if (options.count("url")) url = options["url"].as(); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "No url option passed"); - processTableFiles(url, fs_path, files_prefix); + processTableFiles(url, fs_path, files_prefix, uuid); return 0; } diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 5c9ea0bab31..049e1780d48 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -13,12 +13,15 @@ #include -#define DIRECTORY_FILE_PATTERN(prefix) fmt::format("{}-(\\w+)-(\\w+\\.\\w+)", prefix) -#define ROOT_FILE_PATTERN(prefix) fmt::format("{}-(\\w+\\.\\w+)", prefix) -#define MATCH_DIRECTORY_FILE_PATTERN(prefix) fmt::format("{}/(\\w+)/(\\w+\\.\\w+)", prefix) -#define MATCH_ROOT_FILE_PATTERN(prefix) fmt::format("{}/(\\w+\\.\\w+)", prefix) -#define MATCH_DIRECTORY_PATTERN(prefix) fmt::format("{}/(\\w+)", prefix) +#define UUID_PATTERN "[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}" +#define EXTRACT_UUID_PATTERN fmt::format(".*/({})/.*", UUID_PATTERN) + +#define DIRECTORY_FILE_PATTERN(prefix) fmt::format("{}-({})-(\\w+)-(\\w+\\.\\w+)", prefix, UUID_PATTERN) +#define ROOT_FILE_PATTERN(prefix) fmt::format("{}-({})-(\\w+\\.\\w+)", prefix, UUID_PATTERN) + +#define MATCH_DIRECTORY_FILE_PATTERN fmt::format(".*/({})/(\\w+)/(\\w+\\.\\w+)", UUID_PATTERN) +#define MATCH_ROOT_FILE_PATTERN fmt::format(".*/({})/(\\w+\\.\\w+)", UUID_PATTERN) namespace DB @@ -30,17 +33,13 @@ namespace ErrorCodes } -static const auto store_uuid_prefix = ".*/[\\w]{3}/[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}"; - - -/// Fetch contents of .index file from given uri path. -void DiskWebServer::Metadata::initialize(const String & uri_with_path, const String & files_prefix, ContextPtr context) const +void DiskWebServer::Metadata::initialize(const String & uri_with_path, const String & files_prefix, const String & table_uuid, ContextPtr context) const { - ReadWriteBufferFromHTTP metadata_buf(Poco::URI(fs::path(uri_with_path) / ".index"), + ReadWriteBufferFromHTTP metadata_buf(Poco::URI(fs::path(uri_with_path) / (".index-" + table_uuid)), Poco::Net::HTTPRequest::HTTP_GET, ReadWriteBufferFromHTTP::OutStreamCallback(), ConnectionTimeouts::getHTTPTimeouts(context)); - String directory, file, remote_file_name; + String uuid, directory, file, remote_file_name; size_t file_size; while (!metadata_buf.eof()) @@ -52,19 +51,21 @@ void DiskWebServer::Metadata::initialize(const String & uri_with_path, const Str LOG_DEBUG(&Poco::Logger::get("DiskWeb"), "Read file: {}, size: {}", remote_file_name, file_size); /* - * URI/ {prefix}-all_x_x_x-{file} + * URI/ {prefix}-{uuid}-all_x_x_x-{file} * ... - * {prefix}-format_version.txt - * {prefix}-detached-{file} + * {prefix}-{uuid}-format_version.txt + * {prefix}-{uuid}-detached-{file} * ... */ - if (RE2::FullMatch(remote_file_name, re2::RE2(DIRECTORY_FILE_PATTERN(files_prefix)), &directory, &file)) + if (RE2::FullMatch(remote_file_name, DIRECTORY_FILE_PATTERN(files_prefix), &uuid, &directory, &file)) { - files[directory].insert({file, file_size}); + assert(uuid == table_uuid); + tables_data[uuid][directory].emplace(File(file, file_size)); } - else if (RE2::FullMatch(remote_file_name, re2::RE2(ROOT_FILE_PATTERN(files_prefix)), &file)) + else if (RE2::FullMatch(remote_file_name, ROOT_FILE_PATTERN(files_prefix), &uuid, &file)) { - files[file].insert({file, file_size}); + assert(uuid == table_uuid); + tables_data[uuid][file].emplace(File(file, file_size)); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected file: {}", remote_file_name); @@ -72,20 +73,23 @@ void DiskWebServer::Metadata::initialize(const String & uri_with_path, const Str } -/* Iterate list of files from .index file on a web server (its contents were put - * into DiskWebServer::Metadata) and convert them into paths as though paths in local fs. - */ +template class DiskWebDirectoryIterator final : public IDiskDirectoryIterator { public: - DiskWebDirectoryIterator(DiskWebServer::Metadata & metadata_, const String & directory_root_) - : metadata(metadata_), iter(metadata.files.begin()), directory_root(directory_root_) + using Directory = std::unordered_map; + + DiskWebDirectoryIterator(Directory & directory_, const String & directory_root_) + : directory(directory_), iter(directory.begin()), directory_root(directory_root_) { } void next() override { ++iter; } - bool isValid() const override { return iter != metadata.files.end(); } + bool isValid() const override + { + return iter != directory.end(); + } String path() const override { @@ -98,8 +102,8 @@ public: } private: - DiskWebServer::Metadata & metadata; - DiskWebServer::FilesDirectory::iterator iter; + Directory & directory; + typename Directory::iterator iter; const String directory_root; }; @@ -163,48 +167,44 @@ String DiskWebServer::getFileName(const String & path) const { String result; - if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN(store_uuid_prefix)) - && RE2::Extract(path, MATCH_DIRECTORY_FILE_PATTERN(".*"), fmt::format("{}-\\1-\\2", settings->files_prefix), &result)) + if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN) + && RE2::Extract(path, MATCH_DIRECTORY_FILE_PATTERN, fmt::format("{}-\\1-\\2-\\3", settings->files_prefix), &result)) return result; - if (RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN(store_uuid_prefix)) - && RE2::Extract(path, MATCH_ROOT_FILE_PATTERN(".*"), fmt::format("{}-\\1", settings->files_prefix), &result)) + if (RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN) + && RE2::Extract(path, MATCH_ROOT_FILE_PATTERN, fmt::format("{}-\\1-\\2", settings->files_prefix), &result)) return result; throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected file: {}", path); } -bool DiskWebServer::findFileInMetadata(const String & path, FileAndSize & file_info) const +bool DiskWebServer::findFileInMetadata(const String & path, File & file_info) const { - if (metadata.files.empty()) - metadata.initialize(uri, settings->files_prefix, getContext()); + String table_uuid, directory_name, file_name; - String directory_name, file_name; - - if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN(store_uuid_prefix), &directory_name, &file_name)) + if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN, &table_uuid, &directory_name, &file_name) + || RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN, &table_uuid, &file_name)) { - const auto & directory_files = metadata.files.find(directory_name)->second; - auto file = directory_files.find(file_name); + if (directory_name.empty()) + directory_name = file_name; - if (file == directory_files.end()) + if (!metadata.tables_data.count(table_uuid)) return false; - file_info = std::make_pair(file_name, file->second); - } - else if (RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN(store_uuid_prefix), &file_name)) - { - auto file = metadata.files.find(file_name); - - if (file == metadata.files.end()) + if (!metadata.tables_data[table_uuid].count(directory_name)) return false; - file_info = std::make_pair(file_name, file->second.find(file_name)->second); - } - else - return false; + const auto & files = metadata.tables_data[table_uuid][directory_name]; + auto file = files.find(File(file_name)); + if (file == files.end()) + return false; - return true; + file_info = *file; + return true; + } + + return false; } @@ -212,11 +212,11 @@ bool DiskWebServer::exists(const String & path) const { LOG_DEBUG(log, "Checking existance of file: {}", path); - /// Assume root directory exists. - if (re2::RE2::FullMatch(path, re2::RE2(fmt::format("({})/", store_uuid_prefix)))) - return true; + // Assume root directory exists. + //if (re2::RE2::FullMatch(path, re2::RE2(fmt::format("({})/", store_uuid_prefix)))) + // return true; - FileAndSize file; + File file; return findFileInMetadata(path, file); } @@ -225,12 +225,12 @@ std::unique_ptr DiskWebServer::readFile(const String & p { LOG_DEBUG(log, "Read from file by path: {}", path); - FileAndSize file; + File file; if (!findFileInMetadata(path, file)) throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} not found", path); RemoteMetadata meta(uri, fs::path(path).parent_path() / fs::path(path).filename()); - meta.remote_fs_objects.emplace_back(std::make_pair(getFileName(path), file.second)); + meta.remote_fs_objects.emplace_back(std::make_pair(getFileName(path), file.size)); auto reader = std::make_unique(uri, meta, getContext(), settings->max_read_tries, buf_size); return std::make_unique(std::move(reader), settings->min_bytes_for_seek); @@ -246,28 +246,39 @@ std::unique_ptr DiskWebServer::writeFile(const String & DiskDirectoryIteratorPtr DiskWebServer::iterateDirectory(const String & path) { LOG_DEBUG(log, "Iterate directory: {}", path); - return std::make_unique(metadata, path); + String uuid; + + if (RE2::FullMatch(path, ".*/store/")) + return std::make_unique>(metadata.tables_data, path); + + if (!RE2::Extract(path, EXTRACT_UUID_PATTERN, "\\1", &uuid)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot extract uuid for: {}", path); + + if (!metadata.tables_data.count(uuid)) + metadata.initialize(uri, settings->files_prefix, uuid, getContext()); + + return std::make_unique>(metadata.tables_data[uuid], path); } size_t DiskWebServer::getFileSize(const String & path) const { - FileAndSize file; + File file; if (!findFileInMetadata(path, file)) throw Exception(ErrorCodes::LOGICAL_ERROR, "File {} not found", path); - return file.second; + return file.size; } bool DiskWebServer::isFile(const String & path) const { - return RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN(".*")) || RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN(".*")); + return RE2::FullMatch(path, ".*/\\w+.\\w+"); } bool DiskWebServer::isDirectory(const String & path) const { - return RE2::FullMatch(path, MATCH_DIRECTORY_PATTERN(".*")); + return RE2::FullMatch(path, ".*/\\w+"); } @@ -278,9 +289,6 @@ void registerDiskWebServer(DiskFactory & factory) const String & config_prefix, ContextConstPtr context) -> DiskPtr { - fs::path disk = fs::path(context->getPath()) / "disks" / disk_name; - fs::create_directories(disk); - String uri{config.getString(config_prefix + ".endpoint")}; if (!uri.ends_with('/')) throw Exception(ErrorCodes::BAD_ARGUMENTS, "URI must end with '/', but '{}' doesn't.", uri); diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index cf7f2419999..0fc99122130 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -33,20 +33,37 @@ public: ContextPtr context, SettingsPtr settings_); - using FileAndSize = std::pair; - using FilesInfo = std::unordered_map; - using FilesDirectory = std::map; + struct File + { + String name; + size_t size; + File(const String & name_ = "", const size_t size_ = 0) : name(name_), size(size_) {} + bool operator<(const File & other) const { return name < other.name; } + bool operator==(const File & other) const { return name == other.name; } + }; + + using Directory = std::set; + + /* Each root directory contains either directories like + * all_x_x_x/{file}, detached/, etc, or root files like format_version.txt. + */ + using RootDirectory = std::unordered_map; + + /* Each table is attached via ATTACH TABLE table UUID . + * Then there is a mapping: {table uuid} -> {root directory} + */ + using TableDirectories = std::unordered_map; struct Metadata { /// Fetch meta only when required. - mutable FilesDirectory files; + mutable TableDirectories tables_data; Metadata() {} - void initialize(const String & uri_with_path, const String & files_prefix, ContextPtr context) const; + void initialize(const String & uri_with_path, const String & files_prefix, const String & uuid, ContextPtr context) const; }; - bool findFileInMetadata(const String & path, FileAndSize & file_info) const; + bool findFileInMetadata(const String & path, File & file_info) const; String getFileName(const String & path) const; diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index ccd619ccc78..4e967e1e48c 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -18,29 +18,40 @@ def cluster(): def test_usage(cluster): node1 = cluster.instances["node1"] - node1.query(""" - CREATE TABLE data (id Int32) - ENGINE = MergeTree() ORDER BY id - SETTINGS storage_policy = 'def'; - """) - node1.query("INSERT INTO data SELECT number FROM numbers(100)") - expected = node1.query("SELECT * FROM data ORDER BY id") + expected = "" + uuids = [] + for i in range(3): + node1.query(""" CREATE TABLE data{} (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'def';""".format(i)) + node1.query("INSERT INTO data{} SELECT number FROM numbers(500000 * {})".format(i, i + 1)) + expected = node1.query("SELECT * FROM data{} ORDER BY id".format(i)) - metadata_path = node1.query("SELECT data_paths FROM system.tables WHERE name='data'") - metadata_path = metadata_path[metadata_path.find('/'):metadata_path.rfind('/')+1] - print(f'Metadata: {metadata_path}') + metadata_path = node1.query("SELECT data_paths FROM system.tables WHERE name='data{}'".format(i)) + metadata_path = metadata_path[metadata_path.find('/'):metadata_path.rfind('/')+1] + print(f'Metadata: {metadata_path}') + + node1.exec_in_container(['bash', '-c', + '/usr/bin/clickhouse web-server-exporter --files-prefix data --url http://nginx:80/test1 --metadata-path {}'.format(metadata_path)], user='root') + parts = metadata_path.split('/') + uuids.append(parts[3]) + print(f'UUID: {parts[3]}') - node1.exec_in_container(['bash', '-c', - '/usr/bin/clickhouse web-server-exporter --files-prefix data --url http://nginx:80/test1 --metadata-path {}'.format(metadata_path)], user='root') - parts = metadata_path.split('/') - uuid = parts[3] - print(f'UUID: {uuid}') node2 = cluster.instances["node2"] + for i in range(3): + node2.query(""" + ATTACH TABLE test{} UUID '{}' + (id Int32) ENGINE = MergeTree() ORDER BY id + SETTINGS storage_policy = 'web'; - node2.query(""" - ATTACH TABLE test1 UUID '{}' - (id Int32) ENGINE = MergeTree() ORDER BY id - SETTINGS storage_policy = 'web'; - """.format(uuid)) - result = node2.query("SELECT * FROM test1 ORDER BY id") - assert(result == expected) + -- A crutch with detach/attach, need to fix + DETACH TABLE test{}; + ATTACH TABLE test{}; + """.format(i, uuids[i], i, i)) + + result = node2.query("SELECT count() FROM test{}".format(i)) + assert(int(result) == 500000 * (i+1)) + + #result = node2.query("SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i)) + #assert(result == node1.query("SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i))) + #result = node2.query("SELECT id FROM test{} WHERE id > 789999 AND id < 1487000".format(i)) + #assert(result == node1.query("SELECT id FROM data{} WHERE id > 789999 AND id < 1487000".format(i))) + print(f"Ok {i}") From babb11a887c926ac0286cb0d8ffd5c01dc391161 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 19 Jun 2021 16:36:39 +0000 Subject: [PATCH 12/86] Some fixes --- programs/web-server-exporter/CMakeLists.txt | 2 +- ...cpp => clickhouse-web-server-exporter.cpp} | 9 +++++++- src/Disks/DiskWebServer.cpp | 19 +++++++++-------- src/Disks/DiskWebServer.h | 21 ++++++++++++------- src/IO/HTTPCommon.cpp | 1 + src/IO/ReadIndirectBufferFromWebServer.cpp | 8 ++++--- .../test_disk_over_web_server/test.py | 14 ++++++------- 7 files changed, 45 insertions(+), 29 deletions(-) rename programs/web-server-exporter/{web-server-exporter.cpp => clickhouse-web-server-exporter.cpp} (96%) diff --git a/programs/web-server-exporter/CMakeLists.txt b/programs/web-server-exporter/CMakeLists.txt index c08d13d473b..91c585049a7 100644 --- a/programs/web-server-exporter/CMakeLists.txt +++ b/programs/web-server-exporter/CMakeLists.txt @@ -1,4 +1,4 @@ -set (CLICKHOUSE_WEB_SERVER_EXPORTER_SOURCES web-server-exporter.cpp) +set (CLICKHOUSE_WEB_SERVER_EXPORTER_SOURCES clickhouse-web-server-exporter.cpp) set (CLICKHOUSE_WEB_SERVER_EXPORTER_LINK PRIVATE diff --git a/programs/web-server-exporter/web-server-exporter.cpp b/programs/web-server-exporter/clickhouse-web-server-exporter.cpp similarity index 96% rename from programs/web-server-exporter/web-server-exporter.cpp rename to programs/web-server-exporter/clickhouse-web-server-exporter.cpp index ca2813b4dfb..d193fa3f5ea 100644 --- a/programs/web-server-exporter/web-server-exporter.cpp +++ b/programs/web-server-exporter/clickhouse-web-server-exporter.cpp @@ -17,9 +17,16 @@ namespace fs = std::filesystem; #define UUID_PATTERN "[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}" #define EXTRACT_UUID_PATTERN fmt::format(".*/({})/.*", UUID_PATTERN) + namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + + void processTableFiles(const String & url, const fs::path & path, const String & files_prefix, String uuid) { WriteBufferFromHTTP metadata_buf(Poco::URI(fs::path(url) / (".index-" + uuid)), Poco::Net::HTTPRequest::HTTP_PUT); @@ -102,7 +109,7 @@ int mainEntryClickHouseWebServerExporter(int argc, char ** argv) fs::path fs_path = fs::canonical(metadata_path); String uuid; if (!RE2::Extract(metadata_path, EXTRACT_UUID_PATTERN, "\\1", &uuid)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot extract uuid for: {}", metadata_path); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot extract uuid for: {}", metadata_path); if (options.count("url")) url = options["url"].as(); diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 049e1780d48..dbeea49fbea 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -30,6 +30,7 @@ namespace DB namespace ErrorCodes { extern const int BAD_ARGUMENTS; + extern const int LOGICAL_ERROR; } @@ -59,12 +60,16 @@ void DiskWebServer::Metadata::initialize(const String & uri_with_path, const Str */ if (RE2::FullMatch(remote_file_name, DIRECTORY_FILE_PATTERN(files_prefix), &uuid, &directory, &file)) { - assert(uuid == table_uuid); + if (uuid != table_uuid) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected uuid: {}, expected: {}", uuid, table_uuid); + tables_data[uuid][directory].emplace(File(file, file_size)); } else if (RE2::FullMatch(remote_file_name, ROOT_FILE_PATTERN(files_prefix), &uuid, &file)) { - assert(uuid == table_uuid); + if (uuid != table_uuid) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected uuid: {}, expected: {}", uuid, table_uuid); + tables_data[uuid][file].emplace(File(file, file_size)); } else @@ -168,11 +173,11 @@ String DiskWebServer::getFileName(const String & path) const String result; if (RE2::FullMatch(path, MATCH_DIRECTORY_FILE_PATTERN) - && RE2::Extract(path, MATCH_DIRECTORY_FILE_PATTERN, fmt::format("{}-\\1-\\2-\\3", settings->files_prefix), &result)) + && RE2::Extract(path, MATCH_DIRECTORY_FILE_PATTERN, fmt::format(R"({}-\1-\2-\3)", settings->files_prefix), &result)) return result; if (RE2::FullMatch(path, MATCH_ROOT_FILE_PATTERN) - && RE2::Extract(path, MATCH_ROOT_FILE_PATTERN, fmt::format("{}-\\1-\\2", settings->files_prefix), &result)) + && RE2::Extract(path, MATCH_ROOT_FILE_PATTERN, fmt::format(R"({}-\1-\2)", settings->files_prefix), &result)) return result; throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected file: {}", path); @@ -210,11 +215,7 @@ bool DiskWebServer::findFileInMetadata(const String & path, File & file_info) co bool DiskWebServer::exists(const String & path) const { - LOG_DEBUG(log, "Checking existance of file: {}", path); - - // Assume root directory exists. - //if (re2::RE2::FullMatch(path, re2::RE2(fmt::format("({})/", store_uuid_prefix)))) - // return true; + LOG_DEBUG(log, "Checking existence of file: {}", path); File file; return findFileInMetadata(path, file); diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 0fc99122130..aeba9e7bf54 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -8,6 +8,10 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} struct DiskWebServerSettings { @@ -105,18 +109,13 @@ public: Poco::Timestamp getLastModified(const String &) override { return Poco::Timestamp{}; } - ReservationPtr reserve(UInt64 /*bytes*/) override { return nullptr; } - /// Write and modification part std::unique_ptr writeFile(const String &, size_t, WriteMode) override; void moveFile(const String &, const String &) override {} - void replaceFile(const String &, const String &) override - { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); - } + void replaceFile(const String &, const String &) override {} void removeFile(const String &) override { @@ -128,7 +127,15 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); } - void removeRecursive(const String &) override {} + ReservationPtr reserve(UInt64 /*bytes*/) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } + + void removeRecursive(const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } void removeSharedFile(const String &, bool) override {} diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 346bbf0427e..4fcd3576d7b 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -315,6 +315,7 @@ void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPR if (!(status == Poco::Net::HTTPResponse::HTTP_OK || status == Poco::Net::HTTPResponse::HTTP_CREATED || status == Poco::Net::HTTPResponse::HTTP_ACCEPTED + || status == Poco::Net::HTTPResponse::HTTP_PARTIAL_CONTENT /// Reading with Range header was successful. || (isRedirect(status) && allow_redirects))) { std::stringstream error_message; // STYLE_CHECK_ALLOW_STD_STRING_STREAM diff --git a/src/IO/ReadIndirectBufferFromWebServer.cpp b/src/IO/ReadIndirectBufferFromWebServer.cpp index ef21f16e357..f0a68a8a578 100644 --- a/src/IO/ReadIndirectBufferFromWebServer.cpp +++ b/src/IO/ReadIndirectBufferFromWebServer.cpp @@ -37,7 +37,8 @@ std::unique_ptr ReadIndirectBufferFromWebServer::initialize() Poco::URI uri(url); ReadWriteBufferFromHTTP::HTTPHeaderEntries headers; - headers.emplace_back(std::make_pair("Range:", fmt::format("bytes:{}-", offset))); + headers.emplace_back(std::make_pair("Range", fmt::format("bytes={}-", offset))); + LOG_DEBUG(log, "Reading from offset: {}", offset); return std::make_unique( uri, @@ -46,7 +47,8 @@ std::unique_ptr ReadIndirectBufferFromWebServer::initialize() ConnectionTimeouts::getHTTPTimeouts(context), 0, Poco::Net::HTTPBasicCredentials{}, - buf_size); + buf_size, + headers); } @@ -79,7 +81,7 @@ bool ReadIndirectBufferFromWebServer::nextImpl() } if (!successful_read) - throw Exception(ErrorCodes::NETWORK_ERROR, "All read attempts ({}) failed for url {}", max_read_tries, url); + throw Exception(ErrorCodes::NETWORK_ERROR, "All read attempts ({}) failed for uri: {}", max_read_tries, url); if (ret) { diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 4e967e1e48c..4a64d34c6ba 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -41,17 +41,15 @@ def test_usage(cluster): ATTACH TABLE test{} UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'; - - -- A crutch with detach/attach, need to fix - DETACH TABLE test{}; - ATTACH TABLE test{}; """.format(i, uuids[i], i, i)) result = node2.query("SELECT count() FROM test{}".format(i)) assert(int(result) == 500000 * (i+1)) - #result = node2.query("SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i)) - #assert(result == node1.query("SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i))) - #result = node2.query("SELECT id FROM test{} WHERE id > 789999 AND id < 1487000".format(i)) - #assert(result == node1.query("SELECT id FROM data{} WHERE id > 789999 AND id < 1487000".format(i))) + result = node2.query("SELECT id FROM test{} WHERE id % 56 = 3 ORDER BY id".format(i)) + assert(result == node1.query("SELECT id FROM data{} WHERE id % 56 = 3 ORDER BY id".format(i))) + + result = node2.query("SELECT id FROM test{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(i)) + assert(result == node1.query("SELECT id FROM data{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(i))) + print(f"Ok {i}") From 92e6df7b89ec8fee96f8a054a3f3e09af3413a39 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Wed, 23 Jun 2021 16:17:34 +0300 Subject: [PATCH 13/86] Allow data in square brackets in JSONAsString format --- src/Formats/FormatFactory.cpp | 20 +++++--- src/Formats/FormatFactory.h | 8 +++ src/Formats/JSONEachRowUtils.cpp | 7 +++ src/Formats/JSONEachRowUtils.h | 2 + src/Formats/registerFormats.cpp | 6 +++ .../Impl/JSONAsStringRowInputFormat.cpp | 50 +++++++++++++++++++ .../Formats/Impl/JSONAsStringRowInputFormat.h | 7 +++ .../Impl/JSONEachRowRowInputFormat.cpp | 6 +++ .../01232_json_as_string_format.sh | 50 ++++++++++++++++--- ...s_string_data_in_square_brackets.reference | 2 + ...json_as_string_data_in_square_brackets.sql | 9 ++++ .../01926_json_as_string_array.reference | 1 + .../0_stateless/01926_json_as_string_array.sh | 9 ++++ 13 files changed, 164 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.reference create mode 100644 tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.sql create mode 100644 tests/queries/0_stateless/01926_json_as_string_array.reference create mode 100755 tests/queries/0_stateless/01926_json_as_string_array.sh diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8b7cf9635b4..975ae2e369a 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -162,14 +162,12 @@ InputFormatPtr FormatFactory::getInput( if (settings.max_memory_usage_for_user && settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage_for_user) parallel_parsing = false; - if (parallel_parsing && name == "JSONEachRow") + if (parallel_parsing) { - /// FIXME ParallelParsingBlockInputStream doesn't support formats with non-trivial readPrefix() and readSuffix() - - /// For JSONEachRow we can safely skip whitespace characters - skipWhitespaceIfAny(buf); - if (buf.eof() || *buf.position() == '[') - parallel_parsing = false; /// Disable it for JSONEachRow if data is in square brackets (see JSONEachRowRowInputFormat) + const auto & non_trivial_prefix_and_suffix_checker = getCreators(name).non_trivial_prefix_and_suffix_checker; + /// Disable parallel parsing for input formats with non-trivial readPrefix() and readSuffix(). + if (non_trivial_prefix_and_suffix_checker && non_trivial_prefix_and_suffix_checker(buf)) + parallel_parsing = false; } if (parallel_parsing) @@ -396,6 +394,14 @@ void FormatFactory::registerInputFormatProcessor(const String & name, InputProce target = std::move(input_creator); } +void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker) +{ + auto & target = dict[name].non_trivial_prefix_and_suffix_checker; + if (target) + throw Exception("FormatFactory: Non trivial prefix and suffix checker " + name + " is already registered", ErrorCodes::LOGICAL_ERROR); + target = std::move(non_trivial_prefix_and_suffix_checker); +} + void FormatFactory::registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator) { auto & target = dict[name].output_processor_creator; diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 6edbafd2d64..e935eb4d761 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -93,6 +93,11 @@ private: const RowOutputFormatParams & params, const FormatSettings & settings)>; + /// Some input formats can have non trivial readPrefix() and readSuffix(), + /// so in some cases there is no possibility to use parallel parsing. + /// The checker should return true if parallel parsing should be disabled. + using NonTrivialPrefixAndSuffixChecker = std::function; + struct Creators { InputCreator input_creator; @@ -102,6 +107,7 @@ private: FileSegmentationEngine file_segmentation_engine; bool supports_parallel_formatting{false}; bool is_column_oriented{false}; + NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker; }; using FormatsDictionary = std::unordered_map; @@ -166,6 +172,8 @@ public: void registerOutputFormat(const String & name, OutputCreator output_creator); void registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine); + void registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker); + void registerInputFormatProcessor(const String & name, InputProcessorCreator input_creator); void registerOutputFormatProcessor(const String & name, OutputProcessorCreator output_creator); diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp index 28ba625d9fb..a7c68c4ce00 100644 --- a/src/Formats/JSONEachRowUtils.cpp +++ b/src/Formats/JSONEachRowUtils.cpp @@ -83,4 +83,11 @@ std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, D return {loadAtPosition(in, memory, pos), number_of_rows}; } +bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf) +{ + /// For JSONEachRow we can safely skip whitespace characters + skipWhitespaceIfAny(buf); + return buf.eof() || *buf.position() == '['; +} + } diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h index adf85f37a22..79dd6c6c192 100644 --- a/src/Formats/JSONEachRowUtils.h +++ b/src/Formats/JSONEachRowUtils.h @@ -5,4 +5,6 @@ namespace DB std::pair fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size); +bool nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl(ReadBuffer & buf); + } diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 89fb7c6cc02..8e625df4b3f 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -79,6 +79,9 @@ void registerInputFormatProcessorJSONAsString(FormatFactory & factory); void registerInputFormatProcessorLineAsString(FormatFactory & factory); void registerInputFormatProcessorCapnProto(FormatFactory & factory); +/// Non trivial prefix and suffix checkers for disabling parallel parsing. +void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory); +void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory); void registerFormats() { @@ -152,6 +155,9 @@ void registerFormats() #if !defined(ARCADIA_BUILD) registerInputFormatProcessorCapnProto(factory); #endif + + registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(factory); + registerNonTrivialPrefixAndSuffixCheckerJSONAsString(factory); } } diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index bc57803152f..e968c187bef 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -34,6 +34,35 @@ void JSONAsStringRowInputFormat::resetParser() buf.reset(); } +void JSONAsStringRowInputFormat::readPrefix() +{ + /// In this format, BOM at beginning of stream cannot be confused with value, so it is safe to skip it. + skipBOMIfExists(buf); + + skipWhitespaceIfAny(buf); + if (!buf.eof() && *buf.position() == '[') + { + ++buf.position(); + data_in_square_brackets = true; + } +} + +void JSONAsStringRowInputFormat::readSuffix() +{ + skipWhitespaceIfAny(buf); + if (data_in_square_brackets) + { + assertChar(']', buf); + skipWhitespaceIfAny(buf); + } + if (!buf.eof() && *buf.position() == ';') + { + ++buf.position(); + skipWhitespaceIfAny(buf); + } + assertEOF(buf); +} + void JSONAsStringRowInputFormat::readJSONObject(IColumn & column) { PeekableReadBufferCheckpoint checkpoint{buf}; @@ -113,7 +142,23 @@ void JSONAsStringRowInputFormat::readJSONObject(IColumn & column) bool JSONAsStringRowInputFormat::readRow(MutableColumns & columns, RowReadExtension &) { + if (!allow_new_rows) + return false; + skipWhitespaceIfAny(buf); + if (!buf.eof()) + { + if (!data_in_square_brackets && *buf.position() == ';') + { + /// ';' means the end of query, but it cannot be before ']'. + return allow_new_rows = false; + } + else if (data_in_square_brackets && *buf.position() == ']') + { + /// ']' means the end of query. + return allow_new_rows = false; + } + } if (!buf.eof()) readJSONObject(*columns[0]); @@ -143,4 +188,9 @@ void registerFileSegmentationEngineJSONAsString(FormatFactory & factory) factory.registerFileSegmentationEngine("JSONAsString", &fileSegmentationEngineJSONEachRowImpl); } +void registerNonTrivialPrefixAndSuffixCheckerJSONAsString(FormatFactory & factory) +{ + factory.registerNonTrivialPrefixAndSuffixChecker("JSONAsString", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); +} + } diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h index c15a769343c..c70d9efb178 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.h @@ -22,10 +22,17 @@ public: String getName() const override { return "JSONAsStringRowInputFormat"; } void resetParser() override; + void readPrefix() override; + void readSuffix() override; + private: void readJSONObject(IColumn & column); PeekableReadBuffer buf; + + /// This flag is needed to know if data is in square brackets. + bool data_in_square_brackets = false; + bool allow_new_rows = true; }; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index e0f6514295b..61034a8e01b 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -359,4 +359,10 @@ void registerFileSegmentationEngineJSONEachRow(FormatFactory & factory) factory.registerFileSegmentationEngine("JSONStringsEachRow", &fileSegmentationEngineJSONEachRowImpl); } +void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory) +{ + factory.registerNonTrivialPrefixAndSuffixChecker("JSONEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); + factory.registerNonTrivialPrefixAndSuffixChecker("JSONStringsEachRow", nonTrivialPrefixAndSuffixCheckerJSONEachRowImpl); +} + } diff --git a/tests/queries/0_stateless/01232_json_as_string_format.sh b/tests/queries/0_stateless/01232_json_as_string_format.sh index ed8c5d37cac..a20e23867da 100755 --- a/tests/queries/0_stateless/01232_json_as_string_format.sh +++ b/tests/queries/0_stateless/01232_json_as_string_format.sh @@ -28,21 +28,59 @@ echo ' "array" : [3, 2, 1], "map" : { "z" : 1, - "y" : 2, + "y" : 2, "x" : 3 } } { - "id" : 3, - "date" : "01.03.2020", - "string" : "one more string", - "array" : [3,1,2], + "id" : 3, + "date" : "01.03.2020", + "string" : "one more string", + "array" : [3,1,2], "map" : { - "{" : 1, + "{" : 1, "}}" : 2 } }' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; +echo ' +[ + { + "id" : 1, + "date" : "01.01.2020", + "string" : "123{{{\"\\", + "array" : [1, 2, 3], + "map": { + "a" : 1, + "b" : 2, + "c" : 3 + } + }, + { + "id" : 2, + "date" : "01.02.2020", + "string" : "{another\" + string}}", + "array" : [3, 2, 1], + "map" : { + "z" : 1, + "y" : 2, + "x" : 3 + } + } + { + "id" : 3, + "date" : "01.03.2020", + "string" : "one more string", + "array" : [3,1,2], + "map" : { + "{" : 1, + "}}" : 2 + } + } +]' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; + + $CLICKHOUSE_CLIENT --query="SELECT * FROM json_as_string"; $CLICKHOUSE_CLIENT --query="DROP TABLE json_as_string" diff --git a/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.reference b/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.reference new file mode 100644 index 00000000000..4e4852ed726 --- /dev/null +++ b/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.reference @@ -0,0 +1,2 @@ +{"id": 1, "name": "name1"} +{"id": 2, "name": "name2"} diff --git a/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.sql b/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.sql new file mode 100644 index 00000000000..c568cf92118 --- /dev/null +++ b/tests/queries/0_stateless/01925_json_as_string_data_in_square_brackets.sql @@ -0,0 +1,9 @@ +DROP TABLE IF EXISTS json_square_brackets; +CREATE TABLE json_square_brackets (field String) ENGINE = Memory; +INSERT INTO json_square_brackets FORMAT JSONAsString [{"id": 1, "name": "name1"}, {"id": 2, "name": "name2"}]; +INSERT INTO json_square_brackets FORMAT JSONAsString[]; +INSERT INTO json_square_brackets FORMAT JSONAsString [ ] ; +INSERT INTO json_square_brackets FORMAT JSONEachRow ; + +SELECT * FROM json_square_brackets; +DROP TABLE IF EXISTS json_square_brackets; diff --git a/tests/queries/0_stateless/01926_json_as_string_array.reference b/tests/queries/0_stateless/01926_json_as_string_array.reference new file mode 100644 index 00000000000..749fce669df --- /dev/null +++ b/tests/queries/0_stateless/01926_json_as_string_array.reference @@ -0,0 +1 @@ +1000000 diff --git a/tests/queries/0_stateless/01926_json_as_string_array.sh b/tests/queries/0_stateless/01926_json_as_string_array.sh new file mode 100755 index 00000000000..25259a0f511 --- /dev/null +++ b/tests/queries/0_stateless/01926_json_as_string_array.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -e + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_LOCAL} --query "SELECT '[' || arrayStringConcat(arrayMap(x -> '{\"id\": 1, \"name\": \"name1\"}', range(1000000)), ',') || ']'" | ${CLICKHOUSE_LOCAL} --query "SELECT count() FROM table" --input-format JSONAsString --structure 'field String' From bb404a311e6ad00c273fa6e67ab9d66e7b933e73 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Mon, 28 Jun 2021 12:07:38 +0300 Subject: [PATCH 14/86] Update reference in test --- .../0_stateless/01232_json_as_string_format.reference | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01232_json_as_string_format.reference b/tests/queries/0_stateless/01232_json_as_string_format.reference index 19d50bde85a..43420b71165 100644 --- a/tests/queries/0_stateless/01232_json_as_string_format.reference +++ b/tests/queries/0_stateless/01232_json_as_string_format.reference @@ -1,3 +1,6 @@ {\n "id" : 1,\n "date" : "01.01.2020",\n "string" : "123{{{\\"\\\\",\n "array" : [1, 2, 3],\n "map": {\n "a" : 1,\n "b" : 2,\n "c" : 3\n }\n} -{\n "id" : 2,\n "date" : "01.02.2020",\n "string" : "{another\\"\n string}}",\n "array" : [3, 2, 1],\n "map" : {\n "z" : 1,\n "y" : 2, \n "x" : 3\n }\n} -{\n "id" : 3, \n "date" : "01.03.2020", \n "string" : "one more string", \n "array" : [3,1,2], \n "map" : {\n "{" : 1, \n "}}" : 2\n }\n} +{\n "id" : 2,\n "date" : "01.02.2020",\n "string" : "{another\\"\n string}}",\n "array" : [3, 2, 1],\n "map" : {\n "z" : 1,\n "y" : 2,\n "x" : 3\n }\n} +{\n "id" : 3,\n "date" : "01.03.2020",\n "string" : "one more string",\n "array" : [3,1,2],\n "map" : {\n "{" : 1,\n "}}" : 2\n }\n} +{\n "id" : 1,\n "date" : "01.01.2020",\n "string" : "123{{{\\"\\\\",\n "array" : [1, 2, 3],\n "map": {\n "a" : 1,\n "b" : 2,\n "c" : 3\n }\n } +{\n "id" : 2,\n "date" : "01.02.2020",\n "string" : "{another\\"\n string}}",\n "array" : [3, 2, 1],\n "map" : {\n "z" : 1,\n "y" : 2,\n "x" : 3\n }\n } +{\n "id" : 3,\n "date" : "01.03.2020",\n "string" : "one more string",\n "array" : [3,1,2],\n "map" : {\n "{" : 1,\n "}}" : 2\n }\n } From 3fb93dcdb038974fe9e8bf4d8d7710099cd3f431 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Wed, 30 Jun 2021 12:48:07 +0300 Subject: [PATCH 15/86] Fix test --- .../01232_json_as_string_format.reference | 6 +++--- .../0_stateless/01232_json_as_string_format.sh | 2 +- .../0_stateless/test_v9ndmp/nested_arrays.orc | Bin 0 -> 1352 bytes .../0_stateless/test_v9ndmp/nullable_arrays.orc | Bin 0 -> 720 bytes .../test_v9ndmp/parquet_all_types_1.parquet | Bin 0 -> 10593 bytes .../test_v9ndmp/parquet_all_types_2.parquet | Bin 0 -> 3913 bytes .../test_v9ndmp/parquet_all_types_5.parquet | Bin 0 -> 2561 bytes .../test_v9ndmp/parquet_decimal0.parquet | Bin 0 -> 8849 bytes tests/queries/0_stateless/test_v9ndmp/tuples | Bin 0 -> 3908 bytes 9 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/test_v9ndmp/nested_arrays.orc create mode 100644 tests/queries/0_stateless/test_v9ndmp/nullable_arrays.orc create mode 100644 tests/queries/0_stateless/test_v9ndmp/parquet_all_types_1.parquet create mode 100644 tests/queries/0_stateless/test_v9ndmp/parquet_all_types_2.parquet create mode 100644 tests/queries/0_stateless/test_v9ndmp/parquet_all_types_5.parquet create mode 100644 tests/queries/0_stateless/test_v9ndmp/parquet_decimal0.parquet create mode 100644 tests/queries/0_stateless/test_v9ndmp/tuples diff --git a/tests/queries/0_stateless/01232_json_as_string_format.reference b/tests/queries/0_stateless/01232_json_as_string_format.reference index 43420b71165..d644c12bb3f 100644 --- a/tests/queries/0_stateless/01232_json_as_string_format.reference +++ b/tests/queries/0_stateless/01232_json_as_string_format.reference @@ -1,6 +1,6 @@ -{\n "id" : 1,\n "date" : "01.01.2020",\n "string" : "123{{{\\"\\\\",\n "array" : [1, 2, 3],\n "map": {\n "a" : 1,\n "b" : 2,\n "c" : 3\n }\n} -{\n "id" : 2,\n "date" : "01.02.2020",\n "string" : "{another\\"\n string}}",\n "array" : [3, 2, 1],\n "map" : {\n "z" : 1,\n "y" : 2,\n "x" : 3\n }\n} -{\n "id" : 3,\n "date" : "01.03.2020",\n "string" : "one more string",\n "array" : [3,1,2],\n "map" : {\n "{" : 1,\n "}}" : 2\n }\n} {\n "id" : 1,\n "date" : "01.01.2020",\n "string" : "123{{{\\"\\\\",\n "array" : [1, 2, 3],\n "map": {\n "a" : 1,\n "b" : 2,\n "c" : 3\n }\n } {\n "id" : 2,\n "date" : "01.02.2020",\n "string" : "{another\\"\n string}}",\n "array" : [3, 2, 1],\n "map" : {\n "z" : 1,\n "y" : 2,\n "x" : 3\n }\n } {\n "id" : 3,\n "date" : "01.03.2020",\n "string" : "one more string",\n "array" : [3,1,2],\n "map" : {\n "{" : 1,\n "}}" : 2\n }\n } +{\n "id" : 1,\n "date" : "01.01.2020",\n "string" : "123{{{\\"\\\\",\n "array" : [1, 2, 3],\n "map": {\n "a" : 1,\n "b" : 2,\n "c" : 3\n }\n} +{\n "id" : 2,\n "date" : "01.02.2020",\n "string" : "{another\\"\n string}}",\n "array" : [3, 2, 1],\n "map" : {\n "z" : 1,\n "y" : 2,\n "x" : 3\n }\n} +{\n "id" : 3,\n "date" : "01.03.2020",\n "string" : "one more string",\n "array" : [3,1,2],\n "map" : {\n "{" : 1,\n "}}" : 2\n }\n} diff --git a/tests/queries/0_stateless/01232_json_as_string_format.sh b/tests/queries/0_stateless/01232_json_as_string_format.sh index a20e23867da..d4476476f60 100755 --- a/tests/queries/0_stateless/01232_json_as_string_format.sh +++ b/tests/queries/0_stateless/01232_json_as_string_format.sh @@ -81,6 +81,6 @@ echo ' ]' | $CLICKHOUSE_CLIENT --query="INSERT INTO json_as_string FORMAT JSONAsString"; -$CLICKHOUSE_CLIENT --query="SELECT * FROM json_as_string"; +$CLICKHOUSE_CLIENT --query="SELECT * FROM json_as_string ORDER BY field"; $CLICKHOUSE_CLIENT --query="DROP TABLE json_as_string" diff --git a/tests/queries/0_stateless/test_v9ndmp/nested_arrays.orc b/tests/queries/0_stateless/test_v9ndmp/nested_arrays.orc new file mode 100644 index 0000000000000000000000000000000000000000..c0cde3db0572953d4964470233e8e775e544c0ac GIT binary patch literal 1352 zcmcIjy>1gh5T5pZ42te!jUI&m0{exZEiKXtWh8aHY$T+(gyTn<2f`2N$+z zjFq+Vi(%B3XVC>a%__IJphF*l4OM(cOUKW6S2bIvQ6my8v~1J;$82elxSj8IGF z(rfZ7=E@u5+ABm+j!l1(fN?hM^l)?0%o~& zjgTo(DbY;u82UnhevL!AnJl53bnK5&570Iv@OE8_Jc57*C zTvwW=EifWc4F)0!Ga7HuwOE|#UB`Pjn^7xj^+l}y2-JWvn~Hc9n$gXuhGyxHMt?j^ zD560RQwljq&UlL*o8!s?$+7UX0O_`RArpn0(S&ba0mkv!{l?KKO6`^$BCiC<&_Wjo za@3|=9{Ej&YdZQw&hu$001GYov3$?w_~gMNi4r59WLd~5=|zx<&A};}miAlcE0MNa z{y2D#eI=YS_L3x7h#;HlTuzxb;2~`kyj!hB>69{axwCnV1Wx5Y0W`xIVWNkKqA#U* zpS_WWN)o1?-O$`=S=Q?j>vjZgErGL35^|dC8Wm*_XyNtM`g8fFRWG6SL z>baY3^lPuc)XNgxH9v>eg3(p`(Sg?lJgtQ7;_0Olle;*}X-AF0rSMl7?M#&lC6gy+ ze8>8J3Ty+Nq>Q)tu&yh?bY*$MG_uRS=xFzecJCg&R}SBo;~veaM+xTHqk}6p{{Cj{ z?;xDvj4;uo6$wfO>@zE2F808BR(i^W`zEe*P(u#AN_b`_oC8);MQhR1d#daB7uB_X z@6u8^V@&h}Y$l#COPm8`-^X`lf5aMyz%+wV##dXRLdgZ@I)|&gsT9o|?fRzkh%;Lm z97}z-Fuy>+-bzT=C9YYVcqn517=gR5!sH4f_A3c$Rq}gwYtKd{uugKuTY)xSA!De1 zq`kM3Gf^zRTmh4>@mTJfrnK$ZkUQ^f+m6};r(ME4o==Ms%X9d;v+1kfG0bpAWDk`qqyqKQMtS&x8Oy~D@Mkq1 zy@J)|XV0M9=qJeTj4;FL5xdiuQo(+qPO=9wmh-jns0Pu4^K)p;kKUid?J2}`#+c{{ z%&~fj8O&Jv>fzxg+L>aaONElj6UpY7v7CEr){Ny!3Z@%gn2hC*w`a{*9&Lsjn~RpQ z)c7|lEmbyQs1X@UjacpBtuhIS1u~YgEpT-UUu}g7Xp$tpjAgJ1p1#i))HSaF;|LyD zOnnBWXc}8`iM$dZ&-S=L{29xoJ@DiM^ogA3(^3EyK7=2O`vm2v7+Zld@S#NUXDq)o z!(g-R6gjaF>oaHk*2(J38N20=Bd!F=)qRpAkg<&Jh0A-%DPo>aixSIo_~;X#o;!eM zI3rB-P`T(!sX%@9kvx3CjAg0??ziC4D_Ct|?5Hm2R&InwY;)9e*u7H0{_lRt9>`dp zw8BVh7Tr5Pht|2Hr?M{gnh}_zpGeGL#xi~Y?i`?fP-bIe#K4$hjfoI40( zpS~~|%g=4IW-KFz;KHGzWh{06LqtoL z%UG(>tOC!SThJ|@jD-XOcB);%=Ezviw!`q}M4Vr%lHapiD{Ga&I>{Mt1=`IcGKP9H zmd711+`(hHYnsxwCuolRQ?Y}d-0v_NT^3RM|V+zHo1@ z?2}M!%0)ufg)cQL7T+452=|p1%>Ts{g#AhhLU=I*VZYLWqQ7{7V!s}NP+x37_^%|O z_%9A1)N(%*yB-houByXxsC`@aORXrzCF^e4a!tb!6>=3D=3k43u&c-rLM<4=uHr(` zYpqc1iYSCy%Y^W&kWl>>z+TE2kr3lC8I@&JVTr5=j?tsCb3-5H+4H(WbNWp-1L(zP`! zPqv@>>U3vsb;rq*m0x$B>ghUhys~yf!-m@G*1e4_txfy($0zW=%L@F}8u9;8TJWDM F{|5@@9=-qo literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/test_v9ndmp/parquet_all_types_2.parquet b/tests/queries/0_stateless/test_v9ndmp/parquet_all_types_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9abefffeb7ad8c21623bbf863c29b114be4468b9 GIT binary patch literal 3913 zcmchaL1-LR7{}k6%uXih)Qy|G@ok-^Yt4T2ut|5f$xOqND(X^JV`0FUP>*j-}mOt&SYj*R@`;S?9BV# zd*Azg|M$&%FN zqPiGd4S~omP5hxt1Kk=3fihRxMDMvDaClFy9*8zP73qCrtefC@nSkZSLOYaNU7l4|)H> zZ-+ppB7qF?xy`%2(u7M_QgOvhHSI`9`4npy$^J+f3C%!lTFn3ka)-k=S$On}4x}r$ z9Ri8X$aZ8#(&DJ4G1IhY^}iSmnU(q)dWX9^9Dc~bR*q6R88>68fVc!M2tzH8rt`7f z`W92!62sADk*w0BUlde5{SrbA*xX|9qws{SGh*=K%H`!Cm$$gP&Edfq+#JJP?&v2K zY($I&V77Pd$jM(OtUr=)c2FbRZ#3&kYa4b~dSdthtX6QByI*1cJ_nCooc5%L4t`VN z=f!N0zY`_;vNoFQjny|ND@UDjIa0317Y>>_-L@)&w{kfB*Pr)<6MTGrLzT08-2Ixv z_ebDSUQK_Bk5tfgF&}`ww2;MdE;in+UkGR!%hpC2X6)Hf=1f+T>j&ayqdl;Rj2h4( zT{?F^9}nRVMY^d7whZ?k%6}BT^C*I-7aBR~HZ`6DyZy#V+$l6l7U7Sn;Ma_Q`imtUa)_P3X2)$`BsIeNyYUfTP* z2H$jB%DJQ!J8MO6z8J0sHqka4mwaMe#D3y%<2XEg0kb*jql4#^aE$!Z>w%`Hs(3C! zUwUWp3;yC9si;jCJ?uKCy4u)0Saz5gZkKA)OgQAqPiMan%{v_l=@ucO(c4-w*UEeO z`@KE*K@U!C(CcFs^x}*Jy*|`H-%lIR?^6Kubj$9;W()n_5}~L27W8`~f&ab>!2hr` zyL&m;!llXmIXA}GiQ{X_>u+DFHz&`ptrgy>uWvLiuNEp(GgFnxbFZFRJU9Q^8~OVl QV_p8SCmF^T@t^3w0Ve4D?f?J) literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/test_v9ndmp/parquet_all_types_5.parquet b/tests/queries/0_stateless/test_v9ndmp/parquet_all_types_5.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fa102c2b2f35eb79b6535957493ea482929ef530 GIT binary patch literal 2561 zcmb`JJ#5oJ6vwZZi{pHdG#_Wh(g{UKe(KOxN}E`WbYrL>qMg{%)&)y6O%oR}Gch5= zgenHqiJ_`c)qxQqR53GDOo#z7RGFDPaA&)=gM}c-pai;B@|J- z$}gmb5aRbTt<6)+;~LeNjd?KF6jy%22BX;Q5~P_;;tsr<6dTxBH|_&~F8Z4Dd>o-z zI zkSu2`FU3MWXmA)5@2Um)q%%sQiU&eckY_qv`AnNLnw*xhY5*B*=Om$l4f4Iul&Q zY(UCCK-vL9dBWCIBY$3Mq_t(&}uJs7uySKcUF39x9_f- R6Nb<~9%zps)Z;&YzX52!MNOID3xJ*I7&NFF2v5s^|v#D`Ey2qE;L4J~iB!bG zk(hIa3C6 z?I;@|tyB}5c_O5xDwwXScp**1NqT)mNQ*HTi&cb>CO;%8TqQz!lYmEwvJuj4gV5|J zLRw40>tw|X=~GA-LWHOKx0 zJq7B}9NtT4QbS$c1F!bD^4&b5l6E0OmFaJ^N=>pikxmhJIq5yklJuyd{#y@I^|E|R zpHZ2<7l!&`KS_Za>H@$MD4RNX_hUkn8tRW9!F+=&q|JZpDgTp&ge3p4+$a$y2}x9q zBJOg8^rnfVM-BB(BfR=V7EiuJ|-0I4A^N32?h3cS8e`AG< zN_9}AQ^Z|PdJAnNJ!+`;PQct(vV2RQQJKCMb?~24Bn4`yw@<1A)ezR z4vr+wRm3q8;y98x{bqa-C-I*+17>^?Cu_&i1DKhPhaGG1v#$(6 zc6Gtaek}OeHH9F1mf&T#5&Y~4LXaIf*jj7NSlnsoxNME7SduYZ!Cp4Wz|U3|1ldRe zFIzz1XQKv!Y?FYO4G!?L4FN$m0bpypd20a$|y6|;d=Y_K!7t`bT7Ds_UHj1yEbmGe>?*Z@DRF41v literal 0 HcmV?d00001 diff --git a/tests/queries/0_stateless/test_v9ndmp/tuples b/tests/queries/0_stateless/test_v9ndmp/tuples new file mode 100644 index 0000000000000000000000000000000000000000..e1f7f7f3d164fdee6ff96a1e4ebc39bdbf706e35 GIT binary patch literal 3908 zcmeHKOK#&v5bY*uxFOPJ`8V zZuc#lraO4ppC0)W(LLdjo)G9O9zQ-gKYQH3eP$Emr@xeTAy2Cu)xgEx{S(Jee-pDN zYpAwD%qD*1k`mWhl%*!Ms^~LnU{q|f9Rm~i7m79Zr@%${4X^`#hIkit5Ai;53I1iE zg?|NkIdB#JwPKm;u7ke;+(f(pZh==Kz72Z-dk4H-`1gQA#P@+C_z%E8gna}&2JZy% zG5n{n&k#QcUN9vg(a+%*)Ha8I6?toj3o5(L zwkC{uz8kOwmEA;M0o-DYte~~<8QzJDf=zXhk2Ig1ta}3$@eZEI` zAofgnUO!hM;g_~%j#qOhxsQiuYtMv7YNLXQ?Vl@UpWxTD_(KmrbjEu0mFeY+I$7+k zZjQ?6&4jKix~}QEq3f2eJG$ScCs>c5sC@cT6JZwRd>UxeM_o0ta{T_U0NnJim`>|*r`}bm+{V^6m8S@| z;m|kE!sV1Oy9ZNl*4o`?EtwM4X=?2ui4}%Z3U$X!H~@iXet1Id-*fx-P0^!J^ms~? z+Zta`Ta?FBbl8~UDUycn;W#@ljZlf~N1k4HIAe6FANV(gd*c`LAQpBmFsi-ip&rdw zs5wpL`59kUGuVsJHU7OcW+zp9CS=d|`5tNKBF$judbR@ZOsyQRMmrbvB$~m_MeJbb lYF7CAmmj{<=52@G`W_#XQKY)#-{Z^l=l5~^>h;U5e*sQRRZ0K= literal 0 HcmV?d00001 From e3a96cc870c2f9b34f06b88e65ad109d8334ef2c Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Thu, 29 Jul 2021 14:30:07 +0200 Subject: [PATCH 16/86] Checking whether all deterministic parts can be matched in the right order in sequenceMatch --- .../AggregateFunctionSequenceMatch.h | 82 ++++++++++++++++++- 1 file changed, 79 insertions(+), 3 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index d05a4ca314d..29fae66e291 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -518,6 +518,77 @@ protected: return action_it == action_end; } + /// Splits the pattern into deterministic parts separated by non-deterministic fragments + /// (time constraints and Kleene stars), and tries to match the deterministic parts in their specified order, + /// ignoring the non-deterministic fragments. + /// This function can quickly check that a full match is not possible if some deterministic fragment is missing. + template + bool couldMatchDeterministicParts(const EventEntry events_begin, const EventEntry events_end, bool limit_iterations = true) const + { + size_t events_processed = 0; + auto events_it = events_begin; + std::vector det_part; + + auto find_deterministic_part = [&events_it, &events_end, &events_processed, &det_part, limit_iterations]() + { + auto events_it_init = events_it; + const auto det_part_begin = std::begin(det_part); + const auto det_part_end = std::end(det_part); + auto det_part_it = det_part_begin; + + while (det_part_it != det_part_end && events_it != events_end) + { + /// matching any event + if (*det_part_it == 0) + ++events_it, ++det_part_it; + + /// matching specific event + else { + if (events_it->second.test(*det_part_it - 1)) + ++events_it, ++det_part_it; + + else + { + events_it = ++events_it_init; + det_part_it = det_part_begin; + } + } + + if (limit_iterations && ++events_processed > sequence_match_max_iterations) { + throw Exception{"Pattern application proves too difficult, exceeding max iterations (" + toString(sequence_match_max_iterations) + ")", + ErrorCodes::TOO_SLOW}; + } + } + + det_part.clear(); + return det_part_it == det_part_end; + }; + + for (auto action : actions) { + switch(action.type) { + /// mark AnyEvent action with 0 and SpecificEvent with positive numbers corresponding to the events + case PatternActionType::SpecificEvent: + det_part.push_back(action.extra + 1); + break; + case PatternActionType::AnyEvent: + det_part.push_back(0); + break; + case PatternActionType::KleeneStar: + case PatternActionType::TimeLessOrEqual: + case PatternActionType::TimeLess: + case PatternActionType::TimeGreaterOrEqual: + case PatternActionType::TimeGreater: + case PatternActionType::TimeEqual: + if (!find_deterministic_part()) + return false; + default: + throw Exception{"Unknown PatternActionType", ErrorCodes::LOGICAL_ERROR}; + } + } + + return find_deterministic_part(); + } + private: enum class DFATransition : char { @@ -592,7 +663,8 @@ public: const auto events_end = std::end(data_ref.events_list); auto events_it = events_begin; - bool match = this->pattern_has_time ? this->backtrackingMatch(events_it, events_end) : this->dfaMatch(events_it, events_end); + bool couldMatch = this->couldMatchDeterministicParts(events_begin, events_end, this->pattern_has_time); + bool match = couldMatch && (this->pattern_has_time ? this->backtrackingMatch(events_it, events_end) : this->dfaMatch(events_it, events_end)); assert_cast(to).getData().push_back(match); } }; @@ -628,8 +700,12 @@ private: auto events_it = events_begin; size_t count = 0; - while (events_it != events_end && this->backtrackingMatch(events_it, events_end)) - ++count; + // check if there is a chance of matching the sequence at least once + bool couldMatch = this->couldMatchDeterministicParts(events_begin, events_end); + if (couldMatch) { + while (events_it != events_end && this->backtrackingMatch(events_it, events_end)) + ++count; + } return count; } From 2fad1dd8c51c726dbe77a15b9d2c552b92f4916a Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Thu, 29 Jul 2021 14:39:51 +0200 Subject: [PATCH 17/86] Adding a break before default in switch --- src/AggregateFunctions/AggregateFunctionSequenceMatch.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 29fae66e291..4959a716c9c 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -581,6 +581,7 @@ protected: case PatternActionType::TimeEqual: if (!find_deterministic_part()) return false; + break; default: throw Exception{"Unknown PatternActionType", ErrorCodes::LOGICAL_ERROR}; } From 6a39546e5b566d43271a8bee9dd8b6f495edf733 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Thu, 29 Jul 2021 16:28:22 +0200 Subject: [PATCH 18/86] Removing default placed after an exhaustive sweep through enum values --- src/AggregateFunctions/AggregateFunctionSequenceMatch.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 4959a716c9c..08c7c7a6a6a 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -581,9 +581,6 @@ protected: case PatternActionType::TimeEqual: if (!find_deterministic_part()) return false; - break; - default: - throw Exception{"Unknown PatternActionType", ErrorCodes::LOGICAL_ERROR}; } } From 24db6494de4b913bff0471b8165001a99cbd9655 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Mon, 2 Aug 2021 16:23:23 +0200 Subject: [PATCH 19/86] Events conditions met using bitsets --- .../AggregateFunctionSequenceMatch.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 08c7c7a6a6a..2a34b40741e 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -48,6 +48,7 @@ struct AggregateFunctionSequenceMatchData final bool sorted = true; PODArrayWithStackMemory events_list; + std::bitset conditions_met; void add(const Timestamp timestamp, const Events & events) { @@ -56,6 +57,7 @@ struct AggregateFunctionSequenceMatchData final { events_list.emplace_back(timestamp, events); sorted = false; + conditions_met |= events; } } @@ -87,6 +89,8 @@ struct AggregateFunctionSequenceMatchData final } sorted = true; + + conditions_met |= other.conditions_met; } void sort() @@ -290,6 +294,7 @@ private: dfa_states.back().transition = DFATransition::SpecificEvent; dfa_states.back().event = event_number - 1; dfa_states.emplace_back(); + conditions_in_pattern.set(event_number - 1); } if (!match(")")) @@ -627,6 +632,7 @@ private: protected: /// `True` if the parsed pattern contains time assertions (?t...), `false` otherwise. bool pattern_has_time; + std::bitset conditions_in_pattern; private: std::string pattern; @@ -653,6 +659,11 @@ public: void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { + if ((this->conditions_in_pattern & this->data(place).conditions_met) != this->conditions_in_pattern) + { + assert_cast(to).getData().push_back(false); + return; + } this->data(place).sort(); const auto & data_ref = this->data(place); @@ -684,6 +695,11 @@ public: void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { + if ((this->conditions_in_pattern & this->data(place).conditions_met) != this->conditions_in_pattern) + { + assert_cast(to).getData().push_back(0); + return; + } this->data(place).sort(); assert_cast(to).getData().push_back(count(place)); } From 5abf24df9e464271d0cae27e6642d3917e3ed222 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Tue, 3 Aug 2021 09:47:20 +0200 Subject: [PATCH 20/86] No sorting if unnecessary --- .../AggregateFunctionSequenceMatch.h | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 2a34b40741e..16d5f49de1e 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -66,30 +66,8 @@ struct AggregateFunctionSequenceMatchData final if (other.events_list.empty()) return; - const auto size = events_list.size(); - events_list.insert(std::begin(other.events_list), std::end(other.events_list)); - - /// either sort whole container or do so partially merging ranges afterwards - if (!sorted && !other.sorted) - std::sort(std::begin(events_list), std::end(events_list), Comparator{}); - else - { - const auto begin = std::begin(events_list); - const auto middle = std::next(begin, size); - const auto end = std::end(events_list); - - if (!sorted) - std::sort(begin, middle, Comparator{}); - - if (!other.sorted) - std::sort(middle, end, Comparator{}); - - std::inplace_merge(begin, middle, end, Comparator{}); - } - - sorted = true; - + sorted = false; conditions_met |= other.conditions_met; } From 3f813e700db75bae0012bfd0e70979e32864fd33 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Wed, 4 Aug 2021 10:58:59 +0200 Subject: [PATCH 21/86] Code clearance --- src/AggregateFunctions/AggregateFunctionSequenceMatch.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 16d5f49de1e..c136512bc78 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -530,6 +530,7 @@ protected: if (events_it->second.test(*det_part_it - 1)) ++events_it, ++det_part_it; + /// abandon current matching, try to match the deterministic fragment further in the list else { events_it = ++events_it_init; @@ -650,8 +651,9 @@ public: const auto events_end = std::end(data_ref.events_list); auto events_it = events_begin; - bool couldMatch = this->couldMatchDeterministicParts(events_begin, events_end, this->pattern_has_time); - bool match = couldMatch && (this->pattern_has_time ? this->backtrackingMatch(events_it, events_end) : this->dfaMatch(events_it, events_end)); + bool match = (this->pattern_has_time ? + (this->couldMatchDeterministicParts(events_begin, events_end) && this->backtrackingMatch(events_it, events_end)) : + this->dfaMatch(events_it, events_end)); assert_cast(to).getData().push_back(match); } }; @@ -693,8 +695,7 @@ private: size_t count = 0; // check if there is a chance of matching the sequence at least once - bool couldMatch = this->couldMatchDeterministicParts(events_begin, events_end); - if (couldMatch) { + if (this->couldMatchDeterministicParts(events_begin, events_end)) { while (events_it != events_end && this->backtrackingMatch(events_it, events_end)) ++count; } From e1eb9057c63bfcf584d415f05c76fea3b26c1439 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Wed, 4 Aug 2021 16:57:59 +0300 Subject: [PATCH 22/86] Delete trash --- .../0_stateless/test_v9ndmp/nested_arrays.orc | Bin 1352 -> 0 bytes .../0_stateless/test_v9ndmp/nullable_arrays.orc | Bin 720 -> 0 bytes .../test_v9ndmp/parquet_all_types_1.parquet | Bin 10593 -> 0 bytes .../test_v9ndmp/parquet_all_types_2.parquet | Bin 3913 -> 0 bytes .../test_v9ndmp/parquet_all_types_5.parquet | Bin 2561 -> 0 bytes .../test_v9ndmp/parquet_decimal0.parquet | Bin 8849 -> 0 bytes tests/queries/0_stateless/test_v9ndmp/tuples | Bin 3908 -> 0 bytes 7 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/queries/0_stateless/test_v9ndmp/nested_arrays.orc delete mode 100644 tests/queries/0_stateless/test_v9ndmp/nullable_arrays.orc delete mode 100644 tests/queries/0_stateless/test_v9ndmp/parquet_all_types_1.parquet delete mode 100644 tests/queries/0_stateless/test_v9ndmp/parquet_all_types_2.parquet delete mode 100644 tests/queries/0_stateless/test_v9ndmp/parquet_all_types_5.parquet delete mode 100644 tests/queries/0_stateless/test_v9ndmp/parquet_decimal0.parquet delete mode 100644 tests/queries/0_stateless/test_v9ndmp/tuples diff --git a/tests/queries/0_stateless/test_v9ndmp/nested_arrays.orc b/tests/queries/0_stateless/test_v9ndmp/nested_arrays.orc deleted file mode 100644 index c0cde3db0572953d4964470233e8e775e544c0ac..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1352 zcmcIjy>1gh5T5pZ42te!jUI&m0{exZEiKXtWh8aHY$T+(gyTn<2f`2N$+z zjFq+Vi(%B3XVC>a%__IJphF*l4OM(cOUKW6S2bIvQ6my8v~1J;$82elxSj8IGF z(rfZ7=E@u5+ABm+j!l1(fN?hM^l)?0%o~& zjgTo(DbY;u82UnhevL!AnJl53bnK5&570Iv@OE8_Jc57*C zTvwW=EifWc4F)0!Ga7HuwOE|#UB`Pjn^7xj^+l}y2-JWvn~Hc9n$gXuhGyxHMt?j^ zD560RQwljq&UlL*o8!s?$+7UX0O_`RArpn0(S&ba0mkv!{l?KKO6`^$BCiC<&_Wjo za@3|=9{Ej&YdZQw&hu$001GYov3$?w_~gMNi4r59WLd~5=|zx<&A};}miAlcE0MNa z{y2D#eI=YS_L3x7h#;HlTuzxb;2~`kyj!hB>69{axwCnV1Wx5Y0W`xIVWNkKqA#U* zpS_WWN)o1?-O$`=S=Q?j>vjZgErGL35^|dC8Wm*_XyNtM`g8fFRWG6SL z>baY3^lPuc)XNgxH9v>eg3(p`(Sg?lJgtQ7;_0Olle;*}X-AF0rSMl7?M#&lC6gy+ ze8>8J3Ty+Nq>Q)tu&yh?bY*$MG_uRS=xFzecJCg&R}SBo;~veaM+xTHqk}6p{{Cj{ z?;xDvj4;uo6$wfO>@zE2F808BR(i^W`zEe*P(u#AN_b`_oC8);MQhR1d#daB7uB_X z@6u8^V@&h}Y$l#COPm8`-^X`lf5aMyz%+wV##dXRLdgZ@I)|&gsT9o|?fRzkh%;Lm z97}z-Fuy>+-bzT=C9YYVcqn517=gR5!sH4f_A3c$Rq}gwYtKd{uugKuTY)xSA!De1 zq`kM3Gf^zRTmh4>@mTJfrnK$ZkUQ^f+m6};r(ME4o==Ms%X9d;v+1kfG0bpAWDk`qqyqKQMtS&x8Oy~D@Mkq1 zy@J)|XV0M9=qJeTj4;FL5xdiuQo(+qPO=9wmh-jns0Pu4^K)p;kKUid?J2}`#+c{{ z%&~fj8O&Jv>fzxg+L>aaONElj6UpY7v7CEr){Ny!3Z@%gn2hC*w`a{*9&Lsjn~RpQ z)c7|lEmbyQs1X@UjacpBtuhIS1u~YgEpT-UUu}g7Xp$tpjAgJ1p1#i))HSaF;|LyD zOnnBWXc}8`iM$dZ&-S=L{29xoJ@DiM^ogA3(^3EyK7=2O`vm2v7+Zld@S#NUXDq)o z!(g-R6gjaF>oaHk*2(J38N20=Bd!F=)qRpAkg<&Jh0A-%DPo>aixSIo_~;X#o;!eM zI3rB-P`T(!sX%@9kvx3CjAg0??ziC4D_Ct|?5Hm2R&InwY;)9e*u7H0{_lRt9>`dp zw8BVh7Tr5Pht|2Hr?M{gnh}_zpGeGL#xi~Y?i`?fP-bIe#K4$hjfoI40( zpS~~|%g=4IW-KFz;KHGzWh{06LqtoL z%UG(>tOC!SThJ|@jD-XOcB);%=Ezviw!`q}M4Vr%lHapiD{Ga&I>{Mt1=`IcGKP9H zmd711+`(hHYnsxwCuolRQ?Y}d-0v_NT^3RM|V+zHo1@ z?2}M!%0)ufg)cQL7T+452=|p1%>Ts{g#AhhLU=I*VZYLWqQ7{7V!s}NP+x37_^%|O z_%9A1)N(%*yB-houByXxsC`@aORXrzCF^e4a!tb!6>=3D=3k43u&c-rLM<4=uHr(` zYpqc1iYSCy%Y^W&kWl>>z+TE2kr3lC8I@&JVTr5=j?tsCb3-5H+4H(WbNWp-1L(zP`! zPqv@>>U3vsb;rq*m0x$B>ghUhys~yf!-m@G*1e4_txfy($0zW=%L@F}8u9;8TJWDM F{|5@@9=-qo diff --git a/tests/queries/0_stateless/test_v9ndmp/parquet_all_types_2.parquet b/tests/queries/0_stateless/test_v9ndmp/parquet_all_types_2.parquet deleted file mode 100644 index 9abefffeb7ad8c21623bbf863c29b114be4468b9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3913 zcmchaL1-LR7{}k6%uXih)Qy|G@ok-^Yt4T2ut|5f$xOqND(X^JV`0FUP>*j-}mOt&SYj*R@`;S?9BV# zd*Azg|M$&%FN zqPiGd4S~omP5hxt1Kk=3fihRxMDMvDaClFy9*8zP73qCrtefC@nSkZSLOYaNU7l4|)H> zZ-+ppB7qF?xy`%2(u7M_QgOvhHSI`9`4npy$^J+f3C%!lTFn3ka)-k=S$On}4x}r$ z9Ri8X$aZ8#(&DJ4G1IhY^}iSmnU(q)dWX9^9Dc~bR*q6R88>68fVc!M2tzH8rt`7f z`W92!62sADk*w0BUlde5{SrbA*xX|9qws{SGh*=K%H`!Cm$$gP&Edfq+#JJP?&v2K zY($I&V77Pd$jM(OtUr=)c2FbRZ#3&kYa4b~dSdthtX6QByI*1cJ_nCooc5%L4t`VN z=f!N0zY`_;vNoFQjny|ND@UDjIa0317Y>>_-L@)&w{kfB*Pr)<6MTGrLzT08-2Ixv z_ebDSUQK_Bk5tfgF&}`ww2;MdE;in+UkGR!%hpC2X6)Hf=1f+T>j&ayqdl;Rj2h4( zT{?F^9}nRVMY^d7whZ?k%6}BT^C*I-7aBR~HZ`6DyZy#V+$l6l7U7Sn;Ma_Q`imtUa)_P3X2)$`BsIeNyYUfTP* z2H$jB%DJQ!J8MO6z8J0sHqka4mwaMe#D3y%<2XEg0kb*jql4#^aE$!Z>w%`Hs(3C! zUwUWp3;yC9si;jCJ?uKCy4u)0Saz5gZkKA)OgQAqPiMan%{v_l=@ucO(c4-w*UEeO z`@KE*K@U!C(CcFs^x}*Jy*|`H-%lIR?^6Kubj$9;W()n_5}~L27W8`~f&ab>!2hr` zyL&m;!llXmIXA}GiQ{X_>u+DFHz&`ptrgy>uWvLiuNEp(GgFnxbFZFRJU9Q^8~OVl QV_p8SCmF^T@t^3w0Ve4D?f?J) diff --git a/tests/queries/0_stateless/test_v9ndmp/parquet_all_types_5.parquet b/tests/queries/0_stateless/test_v9ndmp/parquet_all_types_5.parquet deleted file mode 100644 index fa102c2b2f35eb79b6535957493ea482929ef530..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2561 zcmb`JJ#5oJ6vwZZi{pHdG#_Wh(g{UKe(KOxN}E`WbYrL>qMg{%)&)y6O%oR}Gch5= zgenHqiJ_`c)qxQqR53GDOo#z7RGFDPaA&)=gM}c-pai;B@|J- z$}gmb5aRbTt<6)+;~LeNjd?KF6jy%22BX;Q5~P_;;tsr<6dTxBH|_&~F8Z4Dd>o-z zI zkSu2`FU3MWXmA)5@2Um)q%%sQiU&eckY_qv`AnNLnw*xhY5*B*=Om$l4f4Iul&Q zY(UCCK-vL9dBWCIBY$3Mq_t(&}uJs7uySKcUF39x9_f- R6Nb<~9%zps)Z;&YzX52!MNOID3xJ*I7&NFF2v5s^|v#D`Ey2qE;L4J~iB!bG zk(hIa3C6 z?I;@|tyB}5c_O5xDwwXScp**1NqT)mNQ*HTi&cb>CO;%8TqQz!lYmEwvJuj4gV5|J zLRw40>tw|X=~GA-LWHOKx0 zJq7B}9NtT4QbS$c1F!bD^4&b5l6E0OmFaJ^N=>pikxmhJIq5yklJuyd{#y@I^|E|R zpHZ2<7l!&`KS_Za>H@$MD4RNX_hUkn8tRW9!F+=&q|JZpDgTp&ge3p4+$a$y2}x9q zBJOg8^rnfVM-BB(BfR=V7EiuJ|-0I4A^N32?h3cS8e`AG< zN_9}AQ^Z|PdJAnNJ!+`;PQct(vV2RQQJKCMb?~24Bn4`yw@<1A)ezR z4vr+wRm3q8;y98x{bqa-C-I*+17>^?Cu_&i1DKhPhaGG1v#$(6 zc6Gtaek}OeHH9F1mf&T#5&Y~4LXaIf*jj7NSlnsoxNME7SduYZ!Cp4Wz|U3|1ldRe zFIzz1XQKv!Y?FYO4G!?L4FN$m0bpypd20a$|y6|;d=Y_K!7t`bT7Ds_UHj1yEbmGe>?*Z@DRF41v diff --git a/tests/queries/0_stateless/test_v9ndmp/tuples b/tests/queries/0_stateless/test_v9ndmp/tuples deleted file mode 100644 index e1f7f7f3d164fdee6ff96a1e4ebc39bdbf706e35..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3908 zcmeHKOK#&v5bY*uxFOPJ`8V zZuc#lraO4ppC0)W(LLdjo)G9O9zQ-gKYQH3eP$Emr@xeTAy2Cu)xgEx{S(Jee-pDN zYpAwD%qD*1k`mWhl%*!Ms^~LnU{q|f9Rm~i7m79Zr@%${4X^`#hIkit5Ai;53I1iE zg?|NkIdB#JwPKm;u7ke;+(f(pZh==Kz72Z-dk4H-`1gQA#P@+C_z%E8gna}&2JZy% zG5n{n&k#QcUN9vg(a+%*)Ha8I6?toj3o5(L zwkC{uz8kOwmEA;M0o-DYte~~<8QzJDf=zXhk2Ig1ta}3$@eZEI` zAofgnUO!hM;g_~%j#qOhxsQiuYtMv7YNLXQ?Vl@UpWxTD_(KmrbjEu0mFeY+I$7+k zZjQ?6&4jKix~}QEq3f2eJG$ScCs>c5sC@cT6JZwRd>UxeM_o0ta{T_U0NnJim`>|*r`}bm+{V^6m8S@| z;m|kE!sV1Oy9ZNl*4o`?EtwM4X=?2ui4}%Z3U$X!H~@iXet1Id-*fx-P0^!J^ms~? z+Zta`Ta?FBbl8~UDUycn;W#@ljZlf~N1k4HIAe6FANV(gd*c`LAQpBmFsi-ip&rdw zs5wpL`59kUGuVsJHU7OcW+zp9CS=d|`5tNKBF$judbR@ZOsyQRMmrbvB$~m_MeJbb lYF7CAmmj{<=52@G`W_#XQKY)#-{Z^l=l5~^>h;U5e*sQRRZ0K= From edac57b08dd66aa0b0ef8fbf122876f78e2d9d56 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Thu, 5 Aug 2021 09:34:30 +0200 Subject: [PATCH 23/86] Correction for style guidelines --- .../AggregateFunctionSequenceMatch.h | 55 ++++++++++--------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index c136512bc78..52a258c2f94 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -526,7 +526,8 @@ protected: ++events_it, ++det_part_it; /// matching specific event - else { + else + { if (events_it->second.test(*det_part_it - 1)) ++events_it, ++det_part_it; @@ -538,33 +539,34 @@ protected: } } - if (limit_iterations && ++events_processed > sequence_match_max_iterations) { + if (limit_iterations && ++events_processed > sequence_match_max_iterations) throw Exception{"Pattern application proves too difficult, exceeding max iterations (" + toString(sequence_match_max_iterations) + ")", ErrorCodes::TOO_SLOW}; - } } det_part.clear(); return det_part_it == det_part_end; }; - for (auto action : actions) { - switch(action.type) { - /// mark AnyEvent action with 0 and SpecificEvent with positive numbers corresponding to the events - case PatternActionType::SpecificEvent: - det_part.push_back(action.extra + 1); - break; - case PatternActionType::AnyEvent: - det_part.push_back(0); - break; - case PatternActionType::KleeneStar: - case PatternActionType::TimeLessOrEqual: - case PatternActionType::TimeLess: - case PatternActionType::TimeGreaterOrEqual: - case PatternActionType::TimeGreater: - case PatternActionType::TimeEqual: - if (!find_deterministic_part()) - return false; + for (auto action : actions) + { + switch (action.type) + { + /// mark AnyEvent action with 0 and SpecificEvent with positive numbers corresponding to the events + case PatternActionType::SpecificEvent: + det_part.push_back(action.extra + 1); + break; + case PatternActionType::AnyEvent: + det_part.push_back(0); + break; + case PatternActionType::KleeneStar: + case PatternActionType::TimeLessOrEqual: + case PatternActionType::TimeLess: + case PatternActionType::TimeGreaterOrEqual: + case PatternActionType::TimeGreater: + case PatternActionType::TimeEqual: + if (!find_deterministic_part()) + return false; } } @@ -638,9 +640,10 @@ public: void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { + auto & output = assert_cast(to).getData(); if ((this->conditions_in_pattern & this->data(place).conditions_met) != this->conditions_in_pattern) { - assert_cast(to).getData().push_back(false); + output.push_back(false); return; } this->data(place).sort(); @@ -654,7 +657,7 @@ public: bool match = (this->pattern_has_time ? (this->couldMatchDeterministicParts(events_begin, events_end) && this->backtrackingMatch(events_it, events_end)) : this->dfaMatch(events_it, events_end)); - assert_cast(to).getData().push_back(match); + output.push_back(match); } }; @@ -675,13 +678,14 @@ public: void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { + auto & output = assert_cast(to).getData(); if ((this->conditions_in_pattern & this->data(place).conditions_met) != this->conditions_in_pattern) { - assert_cast(to).getData().push_back(0); + output.push_back(0); return; } this->data(place).sort(); - assert_cast(to).getData().push_back(count(place)); + output.push_back(count(place)); } private: @@ -695,7 +699,8 @@ private: size_t count = 0; // check if there is a chance of matching the sequence at least once - if (this->couldMatchDeterministicParts(events_begin, events_end)) { + if (this->couldMatchDeterministicParts(events_begin, events_end)) + { while (events_it != events_end && this->backtrackingMatch(events_it, events_end)) ++count; } From b9bb2b577b4a04830168645231e5d1fb5585f033 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Thu, 5 Aug 2021 11:34:39 +0200 Subject: [PATCH 24/86] Simplifying couldMatchDeterministicParts --- .../AggregateFunctionSequenceMatch.h | 44 +++++++------------ 1 file changed, 15 insertions(+), 29 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 52a258c2f94..1f0227a9b6f 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -510,25 +510,26 @@ protected: { size_t events_processed = 0; auto events_it = events_begin; - std::vector det_part; - auto find_deterministic_part = [&events_it, &events_end, &events_processed, &det_part, limit_iterations]() + const auto actions_end = std::end(actions); + auto actions_it = std::begin(actions); + auto det_part_begin = actions_it; + + auto match_deterministic_part = [&events_it, events_end, &events_processed, det_part_begin, actions_it, limit_iterations]() { auto events_it_init = events_it; - const auto det_part_begin = std::begin(det_part); - const auto det_part_end = std::end(det_part); auto det_part_it = det_part_begin; - while (det_part_it != det_part_end && events_it != events_end) + while (det_part_it != actions_it && events_it != events_end) { /// matching any event - if (*det_part_it == 0) + if (det_part_it->type == PatternActionType::AnyEvent) ++events_it, ++det_part_it; /// matching specific event else { - if (events_it->second.test(*det_part_it - 1)) + if (events_it->second.test(det_part_it->extra)) ++events_it, ++det_part_it; /// abandon current matching, try to match the deterministic fragment further in the list @@ -544,33 +545,18 @@ protected: ErrorCodes::TOO_SLOW}; } - det_part.clear(); - return det_part_it == det_part_end; + return det_part_it == actions_it; }; - for (auto action : actions) - { - switch (action.type) + for (; actions_it != actions_end; ++actions_it) + if (actions_it->type != PatternActionType::SpecificEvent && actions_it->type != PatternActionType::AnyEvent) { - /// mark AnyEvent action with 0 and SpecificEvent with positive numbers corresponding to the events - case PatternActionType::SpecificEvent: - det_part.push_back(action.extra + 1); - break; - case PatternActionType::AnyEvent: - det_part.push_back(0); - break; - case PatternActionType::KleeneStar: - case PatternActionType::TimeLessOrEqual: - case PatternActionType::TimeLess: - case PatternActionType::TimeGreaterOrEqual: - case PatternActionType::TimeGreater: - case PatternActionType::TimeEqual: - if (!find_deterministic_part()) - return false; + if (!match_deterministic_part()) + return false; + det_part_begin = std::next(actions_it); } - } - return find_deterministic_part(); + return match_deterministic_part(); } private: From 0f6696922a8f63899d6272e75e5235dad6b20cea Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 6 Aug 2021 19:43:12 +0000 Subject: [PATCH 25/86] Fix asserion !hasPengingData() --- src/IO/ReadIndirectBufferFromWebServer.cpp | 30 ++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/IO/ReadIndirectBufferFromWebServer.cpp b/src/IO/ReadIndirectBufferFromWebServer.cpp index f0a68a8a578..0cf2de14be9 100644 --- a/src/IO/ReadIndirectBufferFromWebServer.cpp +++ b/src/IO/ReadIndirectBufferFromWebServer.cpp @@ -54,17 +54,24 @@ std::unique_ptr ReadIndirectBufferFromWebServer::initialize() bool ReadIndirectBufferFromWebServer::nextImpl() { - if (!impl) + bool next_result = false, successful_read = false; + + if (impl) + { + impl->position() = position(); + assert(!impl->hasPendingData()); + } + else + { impl = initialize(); + next_result = impl->hasPendingData(); + } - bool ret = false, successful_read = false; - auto sleep_milliseconds = std::chrono::milliseconds(100); - - for (size_t try_num = 0; try_num < max_read_tries; ++try_num) + for (size_t try_num = 0; (try_num < max_read_tries) && !next_result; ++try_num) { try { - ret = impl->next(); + next_result = impl->next(); successful_read = true; break; } @@ -74,23 +81,20 @@ bool ReadIndirectBufferFromWebServer::nextImpl() impl.reset(); impl = initialize(); + next_result = impl->hasPendingData(); } - - std::this_thread::sleep_for(sleep_milliseconds); - sleep_milliseconds *= 2; } if (!successful_read) throw Exception(ErrorCodes::NETWORK_ERROR, "All read attempts ({}) failed for uri: {}", max_read_tries, url); - if (ret) + if (next_result) { - working_buffer = internal_buffer = impl->buffer(); - pos = working_buffer.begin(); + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); offset += working_buffer.size(); } - return ret; + return next_result; } From 9a02c6ad0581f7459b3368dc8e84919d6cbff196 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 15 Aug 2021 13:05:25 +0000 Subject: [PATCH 26/86] Catch any error if it happens on disk load when server is starting up --- .../clickhouse-web-server-exporter.cpp | 14 +++++++++- src/Disks/DiskWebServer.cpp | 28 +++++++++++++++++-- src/Disks/DiskWebServer.h | 6 ++++ 3 files changed, 44 insertions(+), 4 deletions(-) diff --git a/programs/web-server-exporter/clickhouse-web-server-exporter.cpp b/programs/web-server-exporter/clickhouse-web-server-exporter.cpp index d193fa3f5ea..d104a2834f0 100644 --- a/programs/web-server-exporter/clickhouse-web-server-exporter.cpp +++ b/programs/web-server-exporter/clickhouse-web-server-exporter.cpp @@ -72,6 +72,7 @@ void processTableFiles(const String & url, const fs::path & path, const String & int mainEntryClickHouseWebServerExporter(int argc, char ** argv) +try { using namespace DB; namespace po = boost::program_options; @@ -106,7 +107,13 @@ int mainEntryClickHouseWebServerExporter(int argc, char ** argv) else throw Exception(ErrorCodes::BAD_ARGUMENTS, "No files-prefix option passed"); - fs::path fs_path = fs::canonical(metadata_path); + fs::path fs_path = fs::weakly_canonical(metadata_path); + if (!fs::exists(fs_path)) + { + std::cerr << fmt::format("Data path ({}) does not exist", fs_path.string()); + return 1; + } + String uuid; if (!RE2::Extract(metadata_path, EXTRACT_UUID_PATTERN, "\\1", &uuid)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot extract uuid for: {}", metadata_path); @@ -120,3 +127,8 @@ int mainEntryClickHouseWebServerExporter(int argc, char ** argv) return 0; } +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(true); + return 1; +} diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index c7f718bcd17..61322c144e5 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -10,6 +10,7 @@ #include #include +#include #include @@ -31,6 +32,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; + extern const int NETWORK_ERROR; } @@ -57,7 +59,7 @@ void DiskWebServer::Metadata::initialize(const String & uri_with_path, const Str * {prefix}-{uuid}-format_version.txt * {prefix}-{uuid}-detached-{file} * ... - */ + **/ if (RE2::FullMatch(remote_file_name, DIRECTORY_FILE_PATTERN(files_prefix), &uuid, &directory, &file)) { if (uuid != table_uuid) @@ -255,8 +257,28 @@ DiskDirectoryIteratorPtr DiskWebServer::iterateDirectory(const String & path) if (!RE2::Extract(path, EXTRACT_UUID_PATTERN, "\\1", &uuid)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot extract uuid for: {}", path); - if (!metadata.tables_data.count(uuid)) - metadata.initialize(uri, settings->files_prefix, uuid, getContext()); + /// Do not throw if it is not a query, but disk load. + bool can_throw = false; + if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) + can_throw = true; + + try + { + if (!metadata.tables_data.count(uuid)) + metadata.initialize(uri, settings->files_prefix, uuid, getContext()); + } + catch (const Poco::Exception &) + { + const auto message = getCurrentExceptionMessage(false); + if (can_throw) + { + throw Exception(ErrorCodes::NETWORK_ERROR, "Cannot load disk metadata. Error: {}", message); + } + + LOG_TRACE(&Poco::Logger::get("DiskWeb"), "Cannot load disk metadata. Error: {}", message); + /// Empty iterator. + return std::make_unique>(metadata.tables_data[""], path); + } return std::make_unique>(metadata.tables_data[uuid], path); } diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index d7fb2219165..2eb0f20ebd2 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -26,6 +26,11 @@ struct DiskWebServerSettings }; +/* + * If url is not reachable on disk load when server is starting up tables, then all errors are caught. + * If in this case there were errors, tables can be reloaded (become visible) via detach table table_name -> attach table table_name. + * If metadata was successfully loaded at server startup, then tables are available straight away. +**/ class DiskWebServer : public IDisk, WithContext { using SettingsPtr = std::unique_ptr; @@ -64,6 +69,7 @@ public: mutable TableDirectories tables_data; Metadata() {} + void initialize(const String & uri_with_path, const String & files_prefix, const String & uuid, ContextPtr context) const; }; From 77c350cedc47b110af5e0c8ee193fb5a07691c3f Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 16 Aug 2021 14:30:06 +0000 Subject: [PATCH 27/86] Catch and reformat error with invalid url --- src/IO/ReadIndirectBufferFromWebServer.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/IO/ReadIndirectBufferFromWebServer.cpp b/src/IO/ReadIndirectBufferFromWebServer.cpp index 0cf2de14be9..fc1b877f77c 100644 --- a/src/IO/ReadIndirectBufferFromWebServer.cpp +++ b/src/IO/ReadIndirectBufferFromWebServer.cpp @@ -63,7 +63,15 @@ bool ReadIndirectBufferFromWebServer::nextImpl() } else { - impl = initialize(); + try + { + impl = initialize(); + } + catch (const Poco::Exception & e) + { + throw Exception(ErrorCodes::NETWORK_ERROR, "Unreachable url: {}. Error: {}", url, e.what()); + } + next_result = impl->hasPendingData(); } From e137ea180f132530e2fd4b4b9a62be5db5d6c30b Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 16 Aug 2021 14:31:16 +0000 Subject: [PATCH 28/86] Tool: export just files into output-dir by default, but leave test-mode for tests --- .../clickhouse-web-server-exporter.cpp | 65 ++++++++++++++----- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/programs/web-server-exporter/clickhouse-web-server-exporter.cpp b/programs/web-server-exporter/clickhouse-web-server-exporter.cpp index d104a2834f0..b3cb25af900 100644 --- a/programs/web-server-exporter/clickhouse-web-server-exporter.cpp +++ b/programs/web-server-exporter/clickhouse-web-server-exporter.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -26,10 +27,13 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +/* + * A tool to collect files on local fs as is (into current directory or into path from --output-dir option). + * If test-mode option is added, files will be put by given url via PUT request. + */ -void processTableFiles(const String & url, const fs::path & path, const String & files_prefix, String uuid) +void processTableFiles(const fs::path & path, const String & files_prefix, String uuid, WriteBuffer & metadata_buf, std::function(const String &)> create_dst_buf) { - WriteBufferFromHTTP metadata_buf(Poco::URI(fs::path(url) / (".index-" + uuid)), Poco::Net::HTTPRequest::HTTP_PUT); fs::directory_iterator dir_end; auto process_file = [&](const String & file_name, const String & file_path) @@ -41,11 +45,11 @@ void processTableFiles(const String & url, const fs::path & path, const String & writeChar('\n', metadata_buf); auto src_buf = createReadBufferFromFileBase(file_path, fs::file_size(file_path), 0, 0, nullptr); - WriteBufferFromHTTP dst_buf(Poco::URI(fs::path(url) / remote_file_name), Poco::Net::HTTPRequest::HTTP_PUT); + auto dst_buf = create_dst_buf(remote_file_name); - copyData(*src_buf, dst_buf); - dst_buf.next(); - dst_buf.finalize(); + copyData(*src_buf, *dst_buf); + dst_buf->next(); + dst_buf->finalize(); }; for (fs::directory_iterator dir_it(path); dir_it != dir_end; ++dir_it) @@ -63,11 +67,7 @@ void processTableFiles(const String & url, const fs::path & path, const String & process_file(dir_it->path().filename(), dir_it->path()); } } - - metadata_buf.next(); - metadata_buf.finalize(); } - } @@ -81,7 +81,9 @@ try description.add_options() ("help,h", "produce help message") ("metadata-path", po::value(), "Metadata path (select data_paths from system.tables where name='table_name'") - ("url", po::value(), "Web server url") + ("test-mode", "Use test mode, which will put data on given url via PUT") + ("url", po::value(), "Web server url for test mode") + ("output-dir", po::value(), "Directory to put files in non-test mode") ("files-prefix", po::value(), "Prefix for stored files"); po::parsed_options parsed = po::command_line_parser(argc, argv).options(description).run(); @@ -118,17 +120,46 @@ try if (!RE2::Extract(metadata_path, EXTRACT_UUID_PATTERN, "\\1", &uuid)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot extract uuid for: {}", metadata_path); - if (options.count("url")) - url = options["url"].as(); - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No url option passed"); + std::shared_ptr metadata_buf; + std::function(const String &)> create_dst_buf; + String root_path; - processTableFiles(url, fs_path, files_prefix, uuid); + if (options.count("test-mode")) + { + if (options.count("url")) + url = options["url"].as(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No url option passed for test mode"); + + metadata_buf = std::make_shared(Poco::URI(fs::path(url) / (".index-" + uuid)), Poco::Net::HTTPRequest::HTTP_PUT); + + create_dst_buf = [&](const String & remote_file_name) + { + return std::make_shared(Poco::URI(fs::path(url) / remote_file_name), Poco::Net::HTTPRequest::HTTP_PUT); + }; + } + else + { + if (options.count("output-dir")) + root_path = options["output-dir"].as(); + else + root_path = fs::current_path(); + + metadata_buf = std::make_shared(fs::path(root_path) / (".index-" + uuid)); + create_dst_buf = [&](const String & remote_file_name) + { + return std::make_shared(fs::path(root_path) / remote_file_name); + }; + } + + processTableFiles(fs_path, files_prefix, uuid, *metadata_buf, create_dst_buf); + metadata_buf->next(); + metadata_buf->finalize(); return 0; } catch (...) { - std::cerr << DB::getCurrentExceptionMessage(true); + std::cerr << DB::getCurrentExceptionMessage(false); return 1; } From 709bc1a013e0cb293b79f38eb166ce10d0fe5f71 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 16 Aug 2021 14:50:07 +0000 Subject: [PATCH 29/86] Continued --- src/Disks/DiskWebServer.cpp | 9 +++- src/Disks/DiskWebServer.h | 40 ++++++++++++++---- .../test_disk_over_web_server/test.py | 41 +++++++++++-------- 3 files changed, 63 insertions(+), 27 deletions(-) diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 61322c144e5..976d8f393e5 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -33,6 +34,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int LOGICAL_ERROR; extern const int NETWORK_ERROR; + extern const int NOT_IMPLEMENTED; } @@ -240,9 +242,12 @@ std::unique_ptr DiskWebServer::readFile(const String & p } -std::unique_ptr DiskWebServer::writeFile(const String &, size_t, WriteMode) +std::unique_ptr DiskWebServer::writeFile(const String & path, size_t, WriteMode) { - return std::make_unique(); + if (path.ends_with("format_version.txt")) + return std::make_unique(); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); } diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 2eb0f20ebd2..7189c96c6fe 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -121,9 +121,15 @@ public: std::unique_ptr writeFile(const String &, size_t, WriteMode) override; - void moveFile(const String &, const String &) override {} + void moveFile(const String &, const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } - void replaceFile(const String &, const String &) override {} + void replaceFile(const String &, const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } void removeFile(const String &) override { @@ -145,17 +151,35 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); } - void removeSharedFile(const String &, bool) override {} + void removeSharedFile(const String &, bool) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } - void removeSharedRecursive(const String &, bool) override {} + void removeSharedRecursive(const String &, bool) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } - void clearDirectory(const String &) override {} + void clearDirectory(const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } - void moveDirectory(const String &, const String &) override {} + void moveDirectory(const String &, const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } - void removeDirectory(const String &) override {} + void removeDirectory(const String &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } - void setLastModified(const String &, const Poco::Timestamp &) override {} + void setLastModified(const String &, const Poco::Timestamp &) override + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Disk {} is read-only", getName()); + } /// Create part diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 4a64d34c6ba..2d949f93ad7 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -2,6 +2,7 @@ import pytest from helpers.cluster import ClickHouseCluster +uuids = [] @pytest.fixture(scope="module") def cluster(): @@ -10,6 +11,25 @@ def cluster(): cluster.add_instance("node1", main_configs=["configs/storage_conf.xml"], with_nginx=True) cluster.add_instance("node2", main_configs=["configs/storage_conf_web.xml"], with_nginx=True) cluster.start() + + node1 = cluster.instances["node1"] + expected = "" + global uuids + for i in range(3): + node1.query(""" CREATE TABLE data{} (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'def';""".format(i)) + node1.query("INSERT INTO data{} SELECT number FROM numbers(500000 * {})".format(i, i + 1)) + expected = node1.query("SELECT * FROM data{} ORDER BY id".format(i)) + + metadata_path = node1.query("SELECT data_paths FROM system.tables WHERE name='data{}'".format(i)) + metadata_path = metadata_path[metadata_path.find('/'):metadata_path.rfind('/')+1] + print(f'Metadata: {metadata_path}') + + node1.exec_in_container(['bash', '-c', + '/usr/bin/clickhouse web-server-exporter --test-mode --files-prefix data --url http://nginx:80/test1 --metadata-path {}'.format(metadata_path)], user='root') + parts = metadata_path.split('/') + uuids.append(parts[3]) + print(f'UUID: {parts[3]}') + yield cluster finally: @@ -18,24 +38,9 @@ def cluster(): def test_usage(cluster): node1 = cluster.instances["node1"] - expected = "" - uuids = [] - for i in range(3): - node1.query(""" CREATE TABLE data{} (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'def';""".format(i)) - node1.query("INSERT INTO data{} SELECT number FROM numbers(500000 * {})".format(i, i + 1)) - expected = node1.query("SELECT * FROM data{} ORDER BY id".format(i)) - - metadata_path = node1.query("SELECT data_paths FROM system.tables WHERE name='data{}'".format(i)) - metadata_path = metadata_path[metadata_path.find('/'):metadata_path.rfind('/')+1] - print(f'Metadata: {metadata_path}') - - node1.exec_in_container(['bash', '-c', - '/usr/bin/clickhouse web-server-exporter --files-prefix data --url http://nginx:80/test1 --metadata-path {}'.format(metadata_path)], user='root') - parts = metadata_path.split('/') - uuids.append(parts[3]) - print(f'UUID: {parts[3]}') - node2 = cluster.instances["node2"] + global uuids + assert(len(uuids) == 3) for i in range(3): node2.query(""" ATTACH TABLE test{} UUID '{}' @@ -52,4 +57,6 @@ def test_usage(cluster): result = node2.query("SELECT id FROM test{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(i)) assert(result == node1.query("SELECT id FROM data{} WHERE id > 789999 AND id < 999999 ORDER BY id".format(i))) + node2.query("DROP TABLE test{}".format(i)) print(f"Ok {i}") + From f120f3c8a08c168dfa27072e5833c1dd56967201 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 22 Aug 2021 13:38:43 +0300 Subject: [PATCH 30/86] Document the getServerPort function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Задокументировал функцию getServerPort. --- .../functions/other-functions.md | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 577fdd668a2..5bd2e9f5f7e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2236,3 +2236,38 @@ defaultRoles() Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +## getServerPort {#getserverport} + +Returns the number of the server port. When the port is not used by the server, throw an exception. + +**Syntax** + +``` sql +getServerPort(port_name) +``` + +**Arguments** + +- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). + +**Returned value** + +- The number of the server port. + +Type: [UInt16](../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT getServerPort('tcp_port'); +``` + +Result: + +``` text +┌─getServerPort('tcp_port')─┐ +│ 9000 │ +└───────────────────────────┘ +``` From df94b491797ae2302e603831ee41140d817d82e4 Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 22 Aug 2021 19:02:40 +0300 Subject: [PATCH 31/86] Translate to Russian MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Перевел на русский язык. --- .../functions/other-functions.md | 13 ++++- .../functions/other-functions.md | 47 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 5bd2e9f5f7e..d868397b899 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2248,7 +2248,18 @@ getServerPort(port_name) **Arguments** -- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). +- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: + + - 'tcp_port' + - 'tcp_port_secure' + - 'http_port' + - 'https_port' + - 'interserver_http_port' + - 'interserver_https_port' + - 'mysql_port' + - 'postgresql_port' + - 'grpc_port' + - 'prometheus.port' **Returned value** diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index c023786b788..b9649f6c103 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -2185,3 +2185,50 @@ defaultRoles() - Список ролей по умолчанию. Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). + +## getServerPort {#getserverport} + +Возвращает номер порта сервера. Если порт не используется сервером, будет исключение. + +**Синтаксис** + +``` sql +getServerPort(port_name) +``` + +**Аргументы** + +- `port_name` — имя порта сервера. [String](../../sql-reference/data-types/string.md#string). Возможные значения: + + - 'tcp_port' + - 'tcp_port_secure' + - 'http_port' + - 'https_port' + - 'interserver_http_port' + - 'interserver_https_port' + - 'mysql_port' + - 'postgresql_port' + - 'grpc_port' + - 'prometheus.port' + +**Возвращаемое значение** + +- Номер порта сервера. + +Тип: [UInt16](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT getServerPort('tcp_port'); +``` + +Результат: + +``` text +┌─getServerPort('tcp_port')─┐ +│ 9000 │ +└───────────────────────────┘ +``` From 93828a586fcf69ca728819973f6e7095ead7a31a Mon Sep 17 00:00:00 2001 From: Dmitriy Date: Sun, 22 Aug 2021 20:57:21 +0300 Subject: [PATCH 32/86] Fix the translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Поправил перевод. --- docs/ru/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index b9649f6c103..fe61f59cd71 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -2188,7 +2188,7 @@ defaultRoles() ## getServerPort {#getserverport} -Возвращает номер порта сервера. Если порт не используется сервером, будет исключение. +Возвращает номер порта сервера. Если порт не используется сервером, будет сгенерировано исключение. **Синтаксис** From caf40d77edcdfd44a71100af3bb5e03e5bf9d309 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Mon, 23 Aug 2021 11:03:59 +0200 Subject: [PATCH 33/86] Adding a performance test with queries presented in the PR --- tests/performance/sequence_match.xml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 tests/performance/sequence_match.xml diff --git a/tests/performance/sequence_match.xml b/tests/performance/sequence_match.xml new file mode 100644 index 00000000000..dd70c186dc7 --- /dev/null +++ b/tests/performance/sequence_match.xml @@ -0,0 +1,21 @@ + + + hits_100m_single + + + SELECT COUNT(*) FROM hits_100m_single GROUP BY EventTime HAVING sequenceMatch('(?1)(?t<1)(?2)')(EventTime, Age >= 0, Age = -1) + + SELECT 1 FROM hits_100m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + sequenceMatch('(?1)(?t>1000)(?3)')( + EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) + SELECT 1 FROM hits_100m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + sequenceMatch('(?1)(?t<10000)(?2)')( + EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) + SELECT 1 FROM hits_100m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + sequenceMatch('(?1)(?3)(?1)(?3)')( + EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) + SELECT 1 FROM hits_100m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + sequenceMatch('(?1)(?2)(?1)(?2)(?1)')( + EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) + + \ No newline at end of file From 5307d319246ce1b80879ddec74b8651c6fa5a2d6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 23 Aug 2021 11:26:54 +0000 Subject: [PATCH 34/86] Throw for alter and silence for drop --- src/Disks/DiskWebServer.cpp | 2 +- src/Disks/DiskWebServer.h | 2 ++ src/Disks/IDisk.h | 2 ++ src/Disks/IDiskRemote.h | 2 -- src/Disks/ReadIndirectBufferFromRemoteFS.cpp | 2 +- .../ReadIndirectBufferFromWebServer.cpp | 1 + .../ReadIndirectBufferFromWebServer.h | 0 src/Interpreters/InterpreterAlterQuery.cpp | 3 +++ src/Interpreters/InterpreterDropQuery.cpp | 5 +++++ src/Storages/IStorage.cpp | 13 +++++++++++ src/Storages/IStorage.h | 3 +++ src/Storages/StorageMergeTree.cpp | 3 +++ .../test_disk_over_web_server/test.py | 22 +++++++++++++++++++ 13 files changed, 56 insertions(+), 4 deletions(-) rename src/{IO => Disks}/ReadIndirectBufferFromWebServer.cpp (98%) rename src/{IO => Disks}/ReadIndirectBufferFromWebServer.h (100%) diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 976d8f393e5..a541ebc58ae 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 7189c96c6fe..7ce268a2d44 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -93,6 +93,8 @@ public: const String & getPath() const final override { return metadata_path; } + bool isReadOnly() const override { return true; } + UInt64 getTotalSpace() const final override { return std::numeric_limits::max(); } UInt64 getAvailableSpace() const final override { return std::numeric_limits::max(); } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 61c805961ae..a731d45ccba 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -216,6 +216,8 @@ public: /// Overrode in remote fs disks. virtual bool supportZeroCopyReplication() const = 0; + virtual bool isReadOnly() const { return false; } + /// Invoked when Global Context is shutdown. virtual void shutdown() {} diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index 3f2dc5fdddc..5333b14a12d 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -178,8 +178,6 @@ struct IDiskRemote::Metadata : RemoteMetadata static constexpr UInt32 VERSION_RELATIVE_PATHS = 2; static constexpr UInt32 VERSION_READ_ONLY_FLAG = 3; - using PathAndSize = std::pair; - /// Disk path. const String & disk_path; diff --git a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp index c17b40e1ff3..3bc22167f50 100644 --- a/src/Disks/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/ReadIndirectBufferFromRemoteFS.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB diff --git a/src/IO/ReadIndirectBufferFromWebServer.cpp b/src/Disks/ReadIndirectBufferFromWebServer.cpp similarity index 98% rename from src/IO/ReadIndirectBufferFromWebServer.cpp rename to src/Disks/ReadIndirectBufferFromWebServer.cpp index fc1b877f77c..89644e12e8c 100644 --- a/src/IO/ReadIndirectBufferFromWebServer.cpp +++ b/src/Disks/ReadIndirectBufferFromWebServer.cpp @@ -58,6 +58,7 @@ bool ReadIndirectBufferFromWebServer::nextImpl() if (impl) { + /// Restore correct position at the needed offset. impl->position() = position(); assert(!impl->hasPendingData()); } diff --git a/src/IO/ReadIndirectBufferFromWebServer.h b/src/Disks/ReadIndirectBufferFromWebServer.h similarity index 100% rename from src/IO/ReadIndirectBufferFromWebServer.h rename to src/Disks/ReadIndirectBufferFromWebServer.h diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 76e7afb7009..cf503fd1e25 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -32,6 +32,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int INCORRECT_QUERY; extern const int NOT_IMPLEMENTED; + extern const int TABLE_IS_READ_ONLY; } @@ -62,6 +63,8 @@ BlockIO InterpreterAlterQuery::execute() } StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext()); + if (table->isReadOnly()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); auto alter_lock = table->lockForAlter(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 0e15c6be27c..608bd615e6a 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes extern const int UNKNOWN_TABLE; extern const int NOT_IMPLEMENTED; extern const int INCORRECT_QUERY; + extern const int TABLE_IS_READ_ONLY; } @@ -162,6 +163,8 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP if (query.kind == ASTDropQuery::Kind::Detach) { getContext()->checkAccess(drop_storage, table_id); + if (table->isReadOnly()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); if (table->isDictionary()) { @@ -195,6 +198,8 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ASTDropQuery & query, DatabaseP throw Exception("Cannot TRUNCATE dictionary", ErrorCodes::SYNTAX_ERROR); getContext()->checkAccess(AccessType::TRUNCATE, table_id); + if (table->isReadOnly()) + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); table->checkTableCanBeDropped(); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 0e9f82a9802..100522ef2d1 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -201,6 +201,19 @@ NameDependencies IStorage::getDependentViewsByColumn(ContextPtr context) const return name_deps; } +bool IStorage::isReadOnly() const +{ + auto storage_policy = getStoragePolicy(); + if (storage_policy) + { + for (const auto disk : storage_policy->getDisks()) + if (!disk->isReadOnly()) + return false; + return true; + } + return false; +} + std::string PrewhereInfo::dump() const { WriteBufferFromOwnString ss; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 85bfbfb1f84..1d6c8a1101d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -523,6 +523,9 @@ public: /// Returns storage policy if storage supports it. virtual StoragePolicyPtr getStoragePolicy() const { return {}; } + /// Returns true if all disks of storage are read-only. + virtual bool isReadOnly() const; + /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: /// - Simple count() optimization diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 32c2c76dd10..6221bb66506 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -241,6 +241,9 @@ void StorageMergeTree::checkTableCanBeDropped() const void StorageMergeTree::drop() { shutdown(); + /// In case there is read-only disk we cannot allow to call dropAllData(), but dropping tables is allowed. + if (isReadOnly()) + return; dropAllData(); } diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index 2d949f93ad7..c821f42f050 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -60,3 +60,25 @@ def test_usage(cluster): node2.query("DROP TABLE test{}".format(i)) print(f"Ok {i}") + +def test_incorrect_usage(cluster): + node1 = cluster.instances["node1"] + node2 = cluster.instances["node2"] + global uuids + node2.query(""" + ATTACH TABLE test0 UUID '{}' + (id Int32) ENGINE = MergeTree() ORDER BY id + SETTINGS storage_policy = 'web'; + """.format(uuids[0])) + + result = node2.query("SELECT count() FROM test0") + assert(int(result) == 500000) + + result = node2.query_and_get_error("ALTER TABLE test0 ADD COLUMN col1 Int32 first") + assert("Table is read-only" in result) + + result = node2.query_and_get_error("TRUNCATE TABLE test0") + assert("Table is read-only" in result) + + node2.query("DROP TABLE test0") + From 14b87feb2744550aeff0d2fd4fcd528599416a5b Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 24 Aug 2021 07:30:33 +0000 Subject: [PATCH 35/86] Fix build --- src/Storages/IStorage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 4ca7fdaf62a..d3636e6c756 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -206,7 +206,7 @@ bool IStorage::isReadOnly() const auto storage_policy = getStoragePolicy(); if (storage_policy) { - for (const auto disk : storage_policy->getDisks()) + for (const auto & disk : storage_policy->getDisks()) if (!disk->isReadOnly()) return false; return true; From d78e551de9a915a0dd32692c3fdafb3c5172af36 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 24 Aug 2021 12:24:54 +0300 Subject: [PATCH 36/86] Update src/Disks/DiskWebServer.cpp Co-authored-by: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> --- src/Disks/DiskWebServer.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index a541ebc58ae..ddccefcb7fb 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -263,9 +263,7 @@ DiskDirectoryIteratorPtr DiskWebServer::iterateDirectory(const String & path) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot extract uuid for: {}", path); /// Do not throw if it is not a query, but disk load. - bool can_throw = false; - if (CurrentThread::isInitialized() && CurrentThread::get().getQueryContext()) - can_throw = true; + bool can_throw = CurrentThread::isInitialized() && CurrentThread::get().getQueryContext(); try { From 01f4e2e0e006e219c2a0e658ce0ce0f3dfe2ef6d Mon Sep 17 00:00:00 2001 From: OnePiece <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 24 Aug 2021 20:03:49 +0800 Subject: [PATCH 37/86] Update DatabaseCatalog.h --- src/Interpreters/DatabaseCatalog.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 74bfb814ce4..a059439d48e 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -174,6 +174,7 @@ public: void addDependency(const StorageID & from, const StorageID & where); void removeDependency(const StorageID & from, const StorageID & where); Dependencies getDependencies(const StorageID & from) const; + void getViewDependencies(ViewDependencies & view_dependencies_) const; /// For Materialized and Live View void updateDependency(const StorageID & old_from, const StorageID & old_where,const StorageID & new_from, const StorageID & new_where); From 40673aa1f59479f439f3b43cfc7fa234fdcc22e6 Mon Sep 17 00:00:00 2001 From: OnePiece <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 24 Aug 2021 20:05:29 +0800 Subject: [PATCH 38/86] Update DatabaseCatalog.cpp --- src/Interpreters/DatabaseCatalog.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index fd6b5b9a810..73884476091 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -608,6 +608,12 @@ Dependencies DatabaseCatalog::getDependencies(const StorageID & from) const return {}; return Dependencies(iter->second.begin(), iter->second.end()); } + +void DatabaseCatalog::getViewDependencies(ViewDependencies & view_dependencies_) const +{ + std::lock_guard lock{databases_mutex}; + view_dependencies_ = view_dependencies; +} void DatabaseCatalog::updateDependency(const StorageID & old_from, const StorageID & old_where, const StorageID & new_from, From ba594f41b915e7bd9078050bfab7b5b79443864a Mon Sep 17 00:00:00 2001 From: OnePiece <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 24 Aug 2021 20:07:08 +0800 Subject: [PATCH 39/86] Create StorageSystemTableViews.cpp --- .../System/StorageSystemTableViews.cpp | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 src/Storages/System/StorageSystemTableViews.cpp diff --git a/src/Storages/System/StorageSystemTableViews.cpp b/src/Storages/System/StorageSystemTableViews.cpp new file mode 100644 index 00000000000..9687ceafac4 --- /dev/null +++ b/src/Storages/System/StorageSystemTableViews.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include + +namespace DB +{ + +class Context; + +NamesAndTypesList StorageSystemTableViews::getNamesAndTypes() +{ + return { + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"view_database", std::make_shared()}, + {"view_table", std::make_shared()}, + }; +} + +void StorageSystemTableViews::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & ) const +{ + const auto access = context.getAccess(); + const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + ViewDependencies view_dependencies; + DatabaseCatalog::instance().getViewDependencies(view_dependencies); + + for (const auto & [storage_id, view_ids] : view_dependencies) + { + const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, storage_id.database_name); + + if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, storage_id.database_name, storage_id.table_name)) + continue; + + for (const auto & view_id : view_ids) + { + size_t col_num = 0; + res_columns[col_num++]->insert(storage_id.database_name); + res_columns[col_num++]->insert(storage_id.table_name); + res_columns[col_num++]->insert(view_id.database_name); + res_columns[col_num++]->insert(view_id.table_name); + } + } +} + +} From 29aa41577fe391da7691599ec51c7f6c98e9183f Mon Sep 17 00:00:00 2001 From: OnePiece <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 24 Aug 2021 20:07:48 +0800 Subject: [PATCH 40/86] Create StorageSystemTableViews.h --- src/Storages/System/StorageSystemTableViews.h | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 src/Storages/System/StorageSystemTableViews.h diff --git a/src/Storages/System/StorageSystemTableViews.h b/src/Storages/System/StorageSystemTableViews.h new file mode 100644 index 00000000000..daf852a1821 --- /dev/null +++ b/src/Storages/System/StorageSystemTableViews.h @@ -0,0 +1,24 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class StorageSystemTableViews final : public ext::shared_ptr_helper, public IStorageSystemOneBlock +{ + friend struct ext::shared_ptr_helper; +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override; + +public: + std::string getName() const override { return "TableViews"; } + + static NamesAndTypesList getNamesAndTypes(); + +}; + +} From d2919e98c78e74d259ca2acc76a53bd1e36d770e Mon Sep 17 00:00:00 2001 From: OnePiece <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 24 Aug 2021 20:09:04 +0800 Subject: [PATCH 41/86] Update attachSystemTables.cpp --- src/Storages/System/attachSystemTables.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 95e86487073..0f5e1b5f032 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -95,6 +95,7 @@ void attachSystemTablesLocal(IDatabase & system_database) attach(system_database, "zeros_mt", true); attach(system_database, "databases"); attach(system_database, "tables"); + attach(system_database, "table_views"); attach(system_database, "columns"); attach(system_database, "functions"); attach(system_database, "events"); From e9edabc3cb1b56fb7694f08e3c83e2a6ab92800c Mon Sep 17 00:00:00 2001 From: OnePiece <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 24 Aug 2021 20:09:36 +0800 Subject: [PATCH 42/86] Update attachSystemTables.cpp --- src/Storages/System/attachSystemTables.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 0f5e1b5f032..6bb4343e2bf 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include From adefa73b738ae4f0bf7d37f4b0698008c840d2d3 Mon Sep 17 00:00:00 2001 From: OnePiece <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 24 Aug 2021 20:13:04 +0800 Subject: [PATCH 43/86] Create 02013_add_system_table_views.reference --- tests/queries/0_stateless/02013_add_system_table_views.reference | 1 + 1 file changed, 1 insertion(+) create mode 100644 tests/queries/0_stateless/02013_add_system_table_views.reference diff --git a/tests/queries/0_stateless/02013_add_system_table_views.reference b/tests/queries/0_stateless/02013_add_system_table_views.reference new file mode 100644 index 00000000000..3c16c8bc6d9 --- /dev/null +++ b/tests/queries/0_stateless/02013_add_system_table_views.reference @@ -0,0 +1 @@ +default table_views_test default table_views_test_view From 7f72e51a8f33b236e8e506f864a6fed2aec8e9df Mon Sep 17 00:00:00 2001 From: OnePiece <54787696+zhongyuankai@users.noreply.github.com> Date: Tue, 24 Aug 2021 20:14:01 +0800 Subject: [PATCH 44/86] Create 02013_add_system_table_views.sql --- .../queries/0_stateless/02013_add_system_table_views.sql | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/02013_add_system_table_views.sql diff --git a/tests/queries/0_stateless/02013_add_system_table_views.sql b/tests/queries/0_stateless/02013_add_system_table_views.sql new file mode 100644 index 00000000000..0d2616b106f --- /dev/null +++ b/tests/queries/0_stateless/02013_add_system_table_views.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS table_views_test; +CREATE TABLE table_views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; +DROP TABLE IF EXISTS table_views_test_view; +CREATE MATERIALIZED VIEW table_views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM table_views_test; +SELECT * FROM system.table_views WHERE view_table = 'table_views_test_view'; +DROP TABLE IF EXISTS table_views_test_view; +SELECT * FROM system.table_views WHERE view_table = 'table_views_test_view'; +DROP TABLE IF EXISTS table_views_test; From 39ba4f660b6ea569551db41d008d3e8bb388f270 Mon Sep 17 00:00:00 2001 From: jkuklis Date: Wed, 25 Aug 2021 09:26:48 +0200 Subject: [PATCH 45/86] Use smaller dataset in the performance test so that it doesn't time out in PR --- tests/performance/sequence_match.xml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/performance/sequence_match.xml b/tests/performance/sequence_match.xml index dd70c186dc7..537a92b5526 100644 --- a/tests/performance/sequence_match.xml +++ b/tests/performance/sequence_match.xml @@ -1,21 +1,21 @@ - hits_100m_single + hits_10m_single - SELECT COUNT(*) FROM hits_100m_single GROUP BY EventTime HAVING sequenceMatch('(?1)(?t<1)(?2)')(EventTime, Age >= 0, Age = -1) + SELECT COUNT(*) FROM hits_10m_single GROUP BY EventTime HAVING sequenceMatch('(?1)(?t<1)(?2)')(EventTime, Age >= 0, Age = -1) - SELECT 1 FROM hits_100m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?t>1000)(?3)')( EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) - SELECT 1 FROM hits_100m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?t<10000)(?2)')( EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) - SELECT 1 FROM hits_100m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?3)(?1)(?3)')( EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) - SELECT 1 FROM hits_100m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?2)(?1)(?2)(?1)')( EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) - \ No newline at end of file + From 64efa917d895228ffe98129a7c3070fc660fe7ac Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Wed, 25 Aug 2021 18:11:40 +0800 Subject: [PATCH 46/86] review system.views --- src/Interpreters/DatabaseCatalog.cpp | 7 ++- src/Interpreters/DatabaseCatalog.h | 2 +- .../System/StorageSystemTableViews.cpp | 47 ------------------- src/Storages/System/StorageSystemViews.cpp | 45 ++++++++++++++++++ ...ystemTableViews.h => StorageSystemViews.h} | 6 +-- src/Storages/System/attachSystemTables.cpp | 4 +- ...rence => 02014_add_system_views.reference} | 0 ...e_views.sql => 02014_add_system_views.sql} | 0 8 files changed, 54 insertions(+), 57 deletions(-) delete mode 100644 src/Storages/System/StorageSystemTableViews.cpp create mode 100644 src/Storages/System/StorageSystemViews.cpp rename src/Storages/System/{StorageSystemTableViews.h => StorageSystemViews.h} (57%) rename tests/queries/0_stateless/{02013_add_system_table_views.reference => 02014_add_system_views.reference} (100%) rename tests/queries/0_stateless/{02013_add_system_table_views.sql => 02014_add_system_views.sql} (100%) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 73884476091..e8a9b1870e5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -608,11 +608,10 @@ Dependencies DatabaseCatalog::getDependencies(const StorageID & from) const return {}; return Dependencies(iter->second.begin(), iter->second.end()); } - -void DatabaseCatalog::getViewDependencies(ViewDependencies & view_dependencies_) const + +ViewDependencies DatabaseCatalog::getViewDependencies() const; { - std::lock_guard lock{databases_mutex}; - view_dependencies_ = view_dependencies; + return view_dependencies; } void diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index a059439d48e..787fd9dc512 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -174,7 +174,7 @@ public: void addDependency(const StorageID & from, const StorageID & where); void removeDependency(const StorageID & from, const StorageID & where); Dependencies getDependencies(const StorageID & from) const; - void getViewDependencies(ViewDependencies & view_dependencies_) const; + ViewDependencies getViewDependencies() const; /// For Materialized and Live View void updateDependency(const StorageID & old_from, const StorageID & old_where,const StorageID & new_from, const StorageID & new_where); diff --git a/src/Storages/System/StorageSystemTableViews.cpp b/src/Storages/System/StorageSystemTableViews.cpp deleted file mode 100644 index 9687ceafac4..00000000000 --- a/src/Storages/System/StorageSystemTableViews.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include -#include -#include -#include - -namespace DB -{ - -class Context; - -NamesAndTypesList StorageSystemTableViews::getNamesAndTypes() -{ - return { - {"database", std::make_shared()}, - {"table", std::make_shared()}, - {"view_database", std::make_shared()}, - {"view_table", std::make_shared()}, - }; -} - -void StorageSystemTableViews::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & ) const -{ - const auto access = context.getAccess(); - const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); - - ViewDependencies view_dependencies; - DatabaseCatalog::instance().getViewDependencies(view_dependencies); - - for (const auto & [storage_id, view_ids] : view_dependencies) - { - const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, storage_id.database_name); - - if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, storage_id.database_name, storage_id.table_name)) - continue; - - for (const auto & view_id : view_ids) - { - size_t col_num = 0; - res_columns[col_num++]->insert(storage_id.database_name); - res_columns[col_num++]->insert(storage_id.table_name); - res_columns[col_num++]->insert(view_id.database_name); - res_columns[col_num++]->insert(view_id.table_name); - } - } -} - -} diff --git a/src/Storages/System/StorageSystemViews.cpp b/src/Storages/System/StorageSystemViews.cpp new file mode 100644 index 00000000000..5cc648ffec2 --- /dev/null +++ b/src/Storages/System/StorageSystemViews.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include + +namespace DB +{ + +class Context; + +NamesAndTypesList StorageSystemViews::getNamesAndTypes() +{ + return { + {"database", std::make_shared()}, + {"name", std::make_shared()}, + {"table", std::make_shared()}, + {"table_database", std::make_shared()}, + }; +} + +void StorageSystemViews::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const +{ + const auto access = context.getAccess(); + const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); + + for (const auto & [table_id, view_ids] : DatabaseCatalog::instance().getViewDependencies()) + { + const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, table_id.database_name); + + if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, table_id.database_name, table_id.table_name)) + continue; + + size_t col_num; + for (const auto & view_id : view_ids) + { + col_num = 0; + res_columns[col_num++]->insert(table_id.database_name); + res_columns[col_num++]->insert(table_id.table_name); + res_columns[col_num++]->insert(view_id.database_name); + res_columns[col_num++]->insert(view_id.table_name); + } + } +} + +} diff --git a/src/Storages/System/StorageSystemTableViews.h b/src/Storages/System/StorageSystemViews.h similarity index 57% rename from src/Storages/System/StorageSystemTableViews.h rename to src/Storages/System/StorageSystemViews.h index daf852a1821..cfcab9566f1 100644 --- a/src/Storages/System/StorageSystemTableViews.h +++ b/src/Storages/System/StorageSystemViews.h @@ -6,16 +6,16 @@ namespace DB { -class StorageSystemTableViews final : public ext::shared_ptr_helper, public IStorageSystemOneBlock +class StorageSystemViews final : public ext::shared_ptr_helper, public IStorageSystemOneBlock { - friend struct ext::shared_ptr_helper; + friend struct ext::shared_ptr_helper; protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override; public: - std::string getName() const override { return "TableViews"; } + std::string getName() const override { return "SystemViews"; } static NamesAndTypesList getNamesAndTypes(); diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 6bb4343e2bf..3656a239adb 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -44,7 +44,7 @@ #include #include #include -#include +#include #include #include #include @@ -96,7 +96,7 @@ void attachSystemTablesLocal(IDatabase & system_database) attach(system_database, "zeros_mt", true); attach(system_database, "databases"); attach(system_database, "tables"); - attach(system_database, "table_views"); + attach(system_database, "views"); attach(system_database, "columns"); attach(system_database, "functions"); attach(system_database, "events"); diff --git a/tests/queries/0_stateless/02013_add_system_table_views.reference b/tests/queries/0_stateless/02014_add_system_views.reference similarity index 100% rename from tests/queries/0_stateless/02013_add_system_table_views.reference rename to tests/queries/0_stateless/02014_add_system_views.reference diff --git a/tests/queries/0_stateless/02013_add_system_table_views.sql b/tests/queries/0_stateless/02014_add_system_views.sql similarity index 100% rename from tests/queries/0_stateless/02013_add_system_table_views.sql rename to tests/queries/0_stateless/02014_add_system_views.sql From 4f81ff97b22d655531b46b6cb7d32882b0c5c55a Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Wed, 25 Aug 2021 19:22:02 +0800 Subject: [PATCH 47/86] update system.views test --- .../0_stateless/02014_add_system_views.reference | 2 +- .../0_stateless/02014_add_system_views.sql | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02014_add_system_views.reference b/tests/queries/0_stateless/02014_add_system_views.reference index 3c16c8bc6d9..fa8a1885767 100644 --- a/tests/queries/0_stateless/02014_add_system_views.reference +++ b/tests/queries/0_stateless/02014_add_system_views.reference @@ -1 +1 @@ -default table_views_test default table_views_test_view +default views_test default views_test_view diff --git a/tests/queries/0_stateless/02014_add_system_views.sql b/tests/queries/0_stateless/02014_add_system_views.sql index 0d2616b106f..93c6b815ee0 100644 --- a/tests/queries/0_stateless/02014_add_system_views.sql +++ b/tests/queries/0_stateless/02014_add_system_views.sql @@ -1,8 +1,8 @@ -DROP TABLE IF EXISTS table_views_test; -CREATE TABLE table_views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; -DROP TABLE IF EXISTS table_views_test_view; -CREATE MATERIALIZED VIEW table_views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM table_views_test; -SELECT * FROM system.table_views WHERE view_table = 'table_views_test_view'; -DROP TABLE IF EXISTS table_views_test_view; -SELECT * FROM system.table_views WHERE view_table = 'table_views_test_view'; -DROP TABLE IF EXISTS table_views_test; +DROP TABLE IF EXISTS views_test; +CREATE TABLE views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; +DROP TABLE IF EXISTS views_test_view; +CREATE MATERIALIZED VIEW views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test; +SELECT * FROM system.views WHERE view_table = 'views_test_view'; +DROP TABLE IF EXISTS views_test_view; +SELECT * FROM system.views WHERE view_table = 'views_test_view'; +DROP TABLE IF EXISTS views_test; From 3e18e22cfba2c9e114a7753a0eb7d7d099b5eb22 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Wed, 25 Aug 2021 20:20:05 +0800 Subject: [PATCH 48/86] update api --- src/Interpreters/DatabaseCatalog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index e8a9b1870e5..de7fc96f0fc 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -609,7 +609,7 @@ Dependencies DatabaseCatalog::getDependencies(const StorageID & from) const return Dependencies(iter->second.begin(), iter->second.end()); } -ViewDependencies DatabaseCatalog::getViewDependencies() const; +ViewDependencies DatabaseCatalog::getViewDependencies() const { return view_dependencies; } From b62fb1e8cf2dc6ee9f5d0455f2e7905c5d3cd59a Mon Sep 17 00:00:00 2001 From: Dmitriy <72220289+sevirov@users.noreply.github.com> Date: Wed, 25 Aug 2021 22:30:14 +0300 Subject: [PATCH 49/86] Update docs/en/sql-reference/functions/other-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/en/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index d868397b899..a39605c4e00 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -2238,7 +2238,7 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere ## getServerPort {#getserverport} -Returns the number of the server port. When the port is not used by the server, throw an exception. +Returns the number of the server port. When the port is not used by the server, throws an exception. **Syntax** From 121104c4f4ece9728178841e956322acc59c46fd Mon Sep 17 00:00:00 2001 From: Dmitriy <72220289+sevirov@users.noreply.github.com> Date: Wed, 25 Aug 2021 22:30:40 +0300 Subject: [PATCH 50/86] Update docs/ru/sql-reference/functions/other-functions.md Co-authored-by: olgarev <56617294+olgarev@users.noreply.github.com> --- docs/ru/sql-reference/functions/other-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index fe61f59cd71..182d79787de 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -2188,7 +2188,7 @@ defaultRoles() ## getServerPort {#getserverport} -Возвращает номер порта сервера. Если порт не используется сервером, будет сгенерировано исключение. +Возвращает номер порта сервера. Если порт не используется сервером, генерируется исключение. **Синтаксис** From 042d17531c8944d27054637359fa98ec003a8be4 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Thu, 26 Aug 2021 20:05:01 +0800 Subject: [PATCH 51/86] system.views add columns type --- src/Interpreters/DatabaseCatalog.cpp | 3 ++- src/Storages/System/StorageSystemViews.cpp | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index de7fc96f0fc..99ab3cabd31 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -611,7 +611,8 @@ Dependencies DatabaseCatalog::getDependencies(const StorageID & from) const ViewDependencies DatabaseCatalog::getViewDependencies() const { - return view_dependencies; + std::lock_guard lock{databases_mutex}; + return ViewDependencies(view_dependencies.begin(), view_dependencies.end()); } void diff --git a/src/Storages/System/StorageSystemViews.cpp b/src/Storages/System/StorageSystemViews.cpp index 5cc648ffec2..e0e7a75fe89 100644 --- a/src/Storages/System/StorageSystemViews.cpp +++ b/src/Storages/System/StorageSystemViews.cpp @@ -10,11 +10,17 @@ class Context; NamesAndTypesList StorageSystemViews::getNamesAndTypes() { + auto view_type_datatype = std::make_shared(DataTypeEnum8::Values{ + {"Default", static_cast(ViewType::DEFAULT)}, + {"Materialized", static_cast(ViewType::MATERIALIZED)}, + {"Live", static_cast(ViewType::LIVE)}}); + return { {"database", std::make_shared()}, {"name", std::make_shared()}, {"table", std::make_shared()}, {"table_database", std::make_shared()}, + {"type", std::move(view_type_datatype)}, }; } @@ -33,11 +39,24 @@ void StorageSystemViews::fillData(MutableColumns & res_columns, const Context & size_t col_num; for (const auto & view_id : view_ids) { + auto view_ptr = DatabaseCatalog::instance().getTable(view_id, context); + QueryViewsLogElement::ViewType type = QueryViewsLogElement::ViewType::DEFAULT; + + if (const auto * materialized_view = dynamic_cast(view_ptr.get())) + { + type = QueryViewsLogElement::ViewType::MATERIALIZED; + } + else if (const auto * live_view = dynamic_cast(view_ptr.get())) + { + type = QueryViewsLogElement::ViewType::LIVE; + } + col_num = 0; res_columns[col_num++]->insert(table_id.database_name); res_columns[col_num++]->insert(table_id.table_name); res_columns[col_num++]->insert(view_id.database_name); res_columns[col_num++]->insert(view_id.table_name); + res_columns[col_num++]->insert(type); } } } From 15cbd7470a0b93c80aff933c7571c7336487280e Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Thu, 26 Aug 2021 20:54:04 +0800 Subject: [PATCH 52/86] StorageSystemViews bug fix --- src/Storages/System/StorageSystemViews.cpp | 16 ++++++++++------ src/Storages/System/StorageSystemViews.h | 8 ++++---- .../0_stateless/02014_add_system_views.reference | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/Storages/System/StorageSystemViews.cpp b/src/Storages/System/StorageSystemViews.cpp index e0e7a75fe89..c6461fd0949 100644 --- a/src/Storages/System/StorageSystemViews.cpp +++ b/src/Storages/System/StorageSystemViews.cpp @@ -2,6 +2,10 @@ #include #include #include +#include +#include +#include +#include namespace DB { @@ -11,22 +15,22 @@ class Context; NamesAndTypesList StorageSystemViews::getNamesAndTypes() { auto view_type_datatype = std::make_shared(DataTypeEnum8::Values{ - {"Default", static_cast(ViewType::DEFAULT)}, - {"Materialized", static_cast(ViewType::MATERIALIZED)}, - {"Live", static_cast(ViewType::LIVE)}}); + {"Default", static_cast(QueryViewsLogElement::ViewType::DEFAULT)}, + {"Materialized", static_cast(QueryViewsLogElement::ViewType::MATERIALIZED)}, + {"Live", static_cast(QueryViewsLogElement::ViewType::LIVE)}}); return { {"database", std::make_shared()}, {"name", std::make_shared()}, {"table", std::make_shared()}, {"table_database", std::make_shared()}, - {"type", std::move(view_type_datatype)}, + {"view_type", std::move(view_type_datatype)}, }; } -void StorageSystemViews::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const +void StorageSystemViews::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { - const auto access = context.getAccess(); + const auto access = context->getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); for (const auto & [table_id, view_ids] : DatabaseCatalog::instance().getViewDependencies()) diff --git a/src/Storages/System/StorageSystemViews.h b/src/Storages/System/StorageSystemViews.h index cfcab9566f1..67fcb79067e 100644 --- a/src/Storages/System/StorageSystemViews.h +++ b/src/Storages/System/StorageSystemViews.h @@ -1,18 +1,18 @@ #pragma once -#include +#include #include namespace DB { -class StorageSystemViews final : public ext::shared_ptr_helper, public IStorageSystemOneBlock +class StorageSystemViews final : public shared_ptr_helper, public IStorageSystemOneBlock { - friend struct ext::shared_ptr_helper; + friend struct shared_ptr_helper; protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; - void fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo & query_info) const override; + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const override; public: std::string getName() const override { return "SystemViews"; } diff --git a/tests/queries/0_stateless/02014_add_system_views.reference b/tests/queries/0_stateless/02014_add_system_views.reference index fa8a1885767..dc4e25c14e7 100644 --- a/tests/queries/0_stateless/02014_add_system_views.reference +++ b/tests/queries/0_stateless/02014_add_system_views.reference @@ -1 +1 @@ -default views_test default views_test_view +default views_test default views_test_view MATERIALIZED From 103e0abe1ce298d5287caf69f82b7c34cef14c9b Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Thu, 26 Aug 2021 22:08:58 +0800 Subject: [PATCH 53/86] Reserve protocol number for ALTER PRIMARY KEY. --- src/Storages/MergeTree/DataPartsExchange.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 6ff9c16dad5..3df7b6160e0 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -57,6 +57,7 @@ constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION = 4; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID = 5; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY = 6; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION = 7; +constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PRIMARY_KEY = 8; std::string getEndpointId(const std::string & node_id) From c60e935830b1d8891a069b167917130b8d080e22 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Thu, 26 Aug 2021 16:42:39 +0200 Subject: [PATCH 54/86] Adding Format Null to performance test queries, style improvement for the test, additional comments in the code --- .../AggregateFunctionSequenceMatch.h | 2 + tests/performance/sequence_match.xml | 39 ++++++++++++++----- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 1f0227a9b6f..5dfe820b6be 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -48,6 +48,7 @@ struct AggregateFunctionSequenceMatchData final bool sorted = true; PODArrayWithStackMemory events_list; + /// sequenceMatch conditions met at least once in events_list std::bitset conditions_met; void add(const Timestamp timestamp, const Events & events) @@ -599,6 +600,7 @@ private: protected: /// `True` if the parsed pattern contains time assertions (?t...), `false` otherwise. bool pattern_has_time; + /// sequenceMatch conditions met at least once in the pattern std::bitset conditions_in_pattern; private: diff --git a/tests/performance/sequence_match.xml b/tests/performance/sequence_match.xml index 537a92b5526..8f2008d30fc 100644 --- a/tests/performance/sequence_match.xml +++ b/tests/performance/sequence_match.xml @@ -3,19 +3,40 @@ hits_10m_single - SELECT COUNT(*) FROM hits_10m_single GROUP BY EventTime HAVING sequenceMatch('(?1)(?t<1)(?2)')(EventTime, Age >= 0, Age = -1) + + + SELECT 1 FROM hits_10m_single GROUP BY EventTime HAVING + sequenceMatch('(?1)(?t<1)(?2)')( + EventTime, Age >= 0, Age = -1) + FORMAT Null + - SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + + + SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?t>1000)(?3)')( - EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) - SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) + FORMAT Null + + + SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?t<10000)(?2)')( - EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) - SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) + FORMAT Null + + + + + SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?3)(?1)(?3)')( - EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) - SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) + FORMAT Null + + + SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?2)(?1)(?2)(?1)')( - EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) + EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) + FORMAT Null + From 5976869451486c194c7626c05532473713687568 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Fri, 27 Aug 2021 10:06:12 +0800 Subject: [PATCH 55/86] update system views test --- tests/queries/0_stateless/02014_add_system_views.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02014_add_system_views.reference b/tests/queries/0_stateless/02014_add_system_views.reference index dc4e25c14e7..637405f47ee 100644 --- a/tests/queries/0_stateless/02014_add_system_views.reference +++ b/tests/queries/0_stateless/02014_add_system_views.reference @@ -1 +1 @@ -default views_test default views_test_view MATERIALIZED +default views_test default views_test_view Materialized From 306da6f41411c07161803481387411ad21dcc1c2 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Fri, 27 Aug 2021 10:41:15 +0800 Subject: [PATCH 56/86] update system views test --- src/Storages/System/StorageSystemViews.cpp | 2 +- tests/queries/0_stateless/02014_add_system_views.reference | 2 +- tests/queries/0_stateless/02014_add_system_views.sql | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Storages/System/StorageSystemViews.cpp b/src/Storages/System/StorageSystemViews.cpp index c6461fd0949..4e16c7502c5 100644 --- a/src/Storages/System/StorageSystemViews.cpp +++ b/src/Storages/System/StorageSystemViews.cpp @@ -24,7 +24,7 @@ NamesAndTypesList StorageSystemViews::getNamesAndTypes() {"name", std::make_shared()}, {"table", std::make_shared()}, {"table_database", std::make_shared()}, - {"view_type", std::move(view_type_datatype)}, + {"type", std::move(view_type_datatype)}, }; } diff --git a/tests/queries/0_stateless/02014_add_system_views.reference b/tests/queries/0_stateless/02014_add_system_views.reference index 637405f47ee..f13f5755e36 100644 --- a/tests/queries/0_stateless/02014_add_system_views.reference +++ b/tests/queries/0_stateless/02014_add_system_views.reference @@ -1 +1 @@ -default views_test default views_test_view Materialized +default views_test_view default views_test Materialized diff --git a/tests/queries/0_stateless/02014_add_system_views.sql b/tests/queries/0_stateless/02014_add_system_views.sql index 93c6b815ee0..a3a75c19278 100644 --- a/tests/queries/0_stateless/02014_add_system_views.sql +++ b/tests/queries/0_stateless/02014_add_system_views.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS views_test; CREATE TABLE views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; DROP TABLE IF EXISTS views_test_view; CREATE MATERIALIZED VIEW views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test; -SELECT * FROM system.views WHERE view_table = 'views_test_view'; +SELECT * FROM system.views WHERE database = 'default' and view_table = 'views_test_view'; DROP TABLE IF EXISTS views_test_view; -SELECT * FROM system.views WHERE view_table = 'views_test_view'; +SELECT * FROM system.views WHERE database = 'default' and view_table = 'views_test_view'; DROP TABLE IF EXISTS views_test; From b57dff7157e66d59d7f39c39c68764fbd444f3d9 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Fri, 27 Aug 2021 11:32:17 +0800 Subject: [PATCH 57/86] update system views test --- tests/queries/0_stateless/02014_add_system_views.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02014_add_system_views.sql b/tests/queries/0_stateless/02014_add_system_views.sql index a3a75c19278..f4be0975b17 100644 --- a/tests/queries/0_stateless/02014_add_system_views.sql +++ b/tests/queries/0_stateless/02014_add_system_views.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS views_test; CREATE TABLE views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; DROP TABLE IF EXISTS views_test_view; CREATE MATERIALIZED VIEW views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test; -SELECT * FROM system.views WHERE database = 'default' and view_table = 'views_test_view'; +SELECT * FROM system.views WHERE database = 'default' and name = 'views_test_view'; DROP TABLE IF EXISTS views_test_view; -SELECT * FROM system.views WHERE database = 'default' and view_table = 'views_test_view'; +SELECT * FROM system.views WHERE database = 'default' and name = 'views_test_view'; DROP TABLE IF EXISTS views_test; From 3c19cd729f2abc6c548495c0e511817e85f86833 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Fri, 27 Aug 2021 12:27:33 +0800 Subject: [PATCH 58/86] fix system views test --- tests/queries/0_stateless/02014_add_system_views.sql | 8 -------- ...m_views.reference => 02015_add_system_views.reference} | 0 tests/queries/0_stateless/02015_add_system_views.sql | 8 ++++++++ 3 files changed, 8 insertions(+), 8 deletions(-) delete mode 100644 tests/queries/0_stateless/02014_add_system_views.sql rename tests/queries/0_stateless/{02014_add_system_views.reference => 02015_add_system_views.reference} (100%) create mode 100644 tests/queries/0_stateless/02015_add_system_views.sql diff --git a/tests/queries/0_stateless/02014_add_system_views.sql b/tests/queries/0_stateless/02014_add_system_views.sql deleted file mode 100644 index f4be0975b17..00000000000 --- a/tests/queries/0_stateless/02014_add_system_views.sql +++ /dev/null @@ -1,8 +0,0 @@ -DROP TABLE IF EXISTS views_test; -CREATE TABLE views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; -DROP TABLE IF EXISTS views_test_view; -CREATE MATERIALIZED VIEW views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test; -SELECT * FROM system.views WHERE database = 'default' and name = 'views_test_view'; -DROP TABLE IF EXISTS views_test_view; -SELECT * FROM system.views WHERE database = 'default' and name = 'views_test_view'; -DROP TABLE IF EXISTS views_test; diff --git a/tests/queries/0_stateless/02014_add_system_views.reference b/tests/queries/0_stateless/02015_add_system_views.reference similarity index 100% rename from tests/queries/0_stateless/02014_add_system_views.reference rename to tests/queries/0_stateless/02015_add_system_views.reference diff --git a/tests/queries/0_stateless/02015_add_system_views.sql b/tests/queries/0_stateless/02015_add_system_views.sql new file mode 100644 index 00000000000..d48f5d1040c --- /dev/null +++ b/tests/queries/0_stateless/02015_add_system_views.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS default.views_test; +CREATE TABLE default.views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; +DROP TABLE IF EXISTS default.views_test_view; +CREATE MATERIALIZED VIEW default.views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM default.views_test; +SELECT * FROM system.views WHERE database = 'default' and name = 'views_test_view'; +DROP TABLE IF EXISTS default.views_test_view; +SELECT * FROM system.views WHERE database = 'default' and name = 'views_test_view'; +DROP TABLE IF EXISTS default.views_test; From af3a1c30f7dacc3cf87d4a6c5fdffab14c7dc016 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Fri, 27 Aug 2021 13:21:40 +0800 Subject: [PATCH 59/86] fix system views test --- .../02015_add_system_views.reference | 2 +- .../0_stateless/02015_add_system_views.sql | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02015_add_system_views.reference b/tests/queries/0_stateless/02015_add_system_views.reference index f13f5755e36..faa894d50df 100644 --- a/tests/queries/0_stateless/02015_add_system_views.reference +++ b/tests/queries/0_stateless/02015_add_system_views.reference @@ -1 +1 @@ -default views_test_view default views_test Materialized +views_test_db views_test_view views_test_db views_test Materialized diff --git a/tests/queries/0_stateless/02015_add_system_views.sql b/tests/queries/0_stateless/02015_add_system_views.sql index d48f5d1040c..5a48b1b9761 100644 --- a/tests/queries/0_stateless/02015_add_system_views.sql +++ b/tests/queries/0_stateless/02015_add_system_views.sql @@ -1,8 +1,10 @@ -DROP TABLE IF EXISTS default.views_test; -CREATE TABLE default.views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; -DROP TABLE IF EXISTS default.views_test_view; -CREATE MATERIALIZED VIEW default.views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM default.views_test; -SELECT * FROM system.views WHERE database = 'default' and name = 'views_test_view'; -DROP TABLE IF EXISTS default.views_test_view; -SELECT * FROM system.views WHERE database = 'default' and name = 'views_test_view'; -DROP TABLE IF EXISTS default.views_test; +CREATE DATABASE IF NOT EXISTS views_test_db; +DROP TABLE IF EXISTS views_test_db.views_test; +CREATE TABLE views_test_db.views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; +DROP TABLE IF EXISTS views_test_db.views_test_view; +CREATE MATERIALIZED VIEW views_test_db.views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test_db.views_test; +SELECT * FROM system.views WHERE database = 'views_test_db' and name = 'views_test_view'; +DROP TABLE IF EXISTS views_test_db.views_test_view; +SELECT * FROM system.views WHERE database = 'views_test_db' and name = 'views_test_view'; +DROP TABLE IF EXISTS views_test_db.views_test; +DROP DATABASE IF EXISTS views_test_db; From ac0f6060136992d4c6841266444143b5e03baed2 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Fri, 27 Aug 2021 14:14:07 +0800 Subject: [PATCH 60/86] fix system views test --- .../02015_add_system_views.reference | 3 ++- .../0_stateless/02015_add_system_views.sql | 18 ++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/tests/queries/0_stateless/02015_add_system_views.reference b/tests/queries/0_stateless/02015_add_system_views.reference index faa894d50df..b261da18d51 100644 --- a/tests/queries/0_stateless/02015_add_system_views.reference +++ b/tests/queries/0_stateless/02015_add_system_views.reference @@ -1 +1,2 @@ -views_test_db views_test_view views_test_db views_test Materialized +1 +0 diff --git a/tests/queries/0_stateless/02015_add_system_views.sql b/tests/queries/0_stateless/02015_add_system_views.sql index 5a48b1b9761..779ed9a319a 100644 --- a/tests/queries/0_stateless/02015_add_system_views.sql +++ b/tests/queries/0_stateless/02015_add_system_views.sql @@ -1,10 +1,8 @@ -CREATE DATABASE IF NOT EXISTS views_test_db; -DROP TABLE IF EXISTS views_test_db.views_test; -CREATE TABLE views_test_db.views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; -DROP TABLE IF EXISTS views_test_db.views_test_view; -CREATE MATERIALIZED VIEW views_test_db.views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test_db.views_test; -SELECT * FROM system.views WHERE database = 'views_test_db' and name = 'views_test_view'; -DROP TABLE IF EXISTS views_test_db.views_test_view; -SELECT * FROM system.views WHERE database = 'views_test_db' and name = 'views_test_view'; -DROP TABLE IF EXISTS views_test_db.views_test; -DROP DATABASE IF EXISTS views_test_db; +DROP TABLE IF EXISTS views_test; +CREATE TABLE views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; +DROP TABLE IF EXISTS views_test_view; +CREATE MATERIALIZED VIEW views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test; +SELECT count(*) FROM system.views WHERE name = 'views_test_view'; +DROP TABLE IF EXISTS views_test_view; +SELECT count(*) FROM system.views WHERE name = 'views_test_view'; +DROP TABLE IF EXISTS views_test; \ No newline at end of file From 19458a0c61b9477e8cad28ac1c3f9bd4e01afa5e Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 27 Aug 2021 10:02:45 +0300 Subject: [PATCH 61/86] Fix removing of parts in a Temporary state Parts in a temporary state is not exists in data_parts_by_info, so do not try to search there and throw LOGICAL_ERROR in case of failure:
``` 02:45:49.037546 [ 5890 ] {} test_iy9rta.concurrent_kill_4 (526aa7c8-db2a-4f0e-926a-a7c8db2a9f0e): Code: 40. DB::Exception: Part all_0_0_0_1 from r11 has different columns hash. (CHECKSUM_DOESNT_MATCH) (version 21.10.1.7910 (official build)). Data after mutation is not byte-identical to data on another replicas. We will download merged part from replica to force byte-identical result. 02:45:49.049422 [ 5890 ] {} test_iy9rta.concurrent_kill_4 (526aa7c8-db2a-4f0e-926a-a7c8db2a9f0e): Trying to immediately remove part all_0_0_0_1 (state Temporary) 02:45:49.060210 [ 5890 ] {} : Logical error: 'Part all_0_0_0_1 doesn't exist'. 02:47:01.572508 [ 29208 ] {} BaseDaemon: (version 21.10.1.7910 (official build), build id: 9309CECED9A0D32CBB00BB8EC77B91456679868E) (from thread 5890) (no query) Received signal Aborted (6) ... 02:47:06.815000 [ 29208 ] {} BaseDaemon: 8. ./obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeData.cpp:2593: DB::MergeTreeData::tryRemovePartImmediately(std::__1::shared_ptr&&) @ 0x1f959e0d in /usr/bin/clickhouse 02:47:06.816309 [ 29209 ] {} BaseDaemon: 8. ./obj-x86_64-linux-gnu/../src/Storages/MergeTree/MergeTreeData.cpp:2593: DB::MergeTreeData::tryRemovePartImmediately(std::__1::shared_ptr&&) @ 0x1f959e0d in /usr/bin/clickhouse 02:47:09.455665 [ 29208 ] {} BaseDaemon: 9. ./obj-x86_64-linux-gnu/../src/Storages/StorageReplicatedMergeTree.cpp:1939: DB::StorageReplicatedMergeTree::tryExecutePartMutation(DB::ReplicatedMergeTreeLogEntry const&) @ 0x1f5f2bf6 in /usr/bin/clickhouse 02:47:09.468738 [ 29209 ] {} BaseDaemon: 9. ./obj-x86_64-linux-gnu/../src/Storages/StorageReplicatedMergeTree.cpp:1939: DB::StorageReplicatedMergeTree::tryExecutePartMutation(DB::ReplicatedMergeTreeLogEntry const&) @ 0x1f5f2bf6 in /usr/bin/clickhouse 02:47:11.776857 [ 29208 ] {} BaseDaemon: 10. ./obj-x86_64-linux-gnu/../src/Storages/StorageReplicatedMergeTree.cpp:1581: DB::StorageReplicatedMergeTree::executeLogEntry(DB::ReplicatedMergeTreeLogEntry&) @ 0x1f5e484c in /usr/bin/clickhouse 02:47:11.904232 [ 29209 ] {} BaseDaemon: 10. ./obj-x86_64-linux-gnu/../src/Storages/StorageReplicatedMergeTree.cpp:1581: DB::StorageReplicatedMergeTree::executeLogEntry(DB::ReplicatedMergeTreeLogEntry&) @ 0x1f5e484c in /usr/bin/clickhouse 02:47:13.941811 [ 29208 ] {} BaseDaemon: 11. ./obj-x86_64-linux-gnu/../src/Storages/StorageReplicatedMergeTree.cpp:3176: DB::StorageReplicatedMergeTree::processQueueEntry(std::__1::shared_ptr)::$_18::operator() shared_ptr&) const @ 0x1f65faa5 in /usr/bin/clickhouse 02:47:14.477158 [ 29209 ] {} BaseDaemon: 11. ./obj-x86_64-linux-gnu/../src/Storages/StorageReplicatedMergeTree.cpp:3176: DB::StorageReplicatedMergeTree::processQueueEntry(std::__1::shared_ptr)::$_18::operator() shared_ptr&) const @ 0x1f65faa5 in /usr/bin/clickhouse 02:47:16.475373 [ 29208 ] {} BaseDaemon: 12. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3676: decltype(std::__1::forward))(std::__1::forward&>(fp0))) std::__1::__invoke)::$_18&, std::__1::shared_ptr&>(DB::StorageReplicatedMergeTree::processQueueEntry(std::__1::shared_ptr)::$_18&, std::__1::shared_ptr&) @ 0x1f65fa32 in /usr/bin/clickhouse 02:47:16.970325 [ 29209 ] {} BaseDaemon: 12. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/type_traits:3676: decltype(std::__1::forward))(std::__1::forward&>(fp0))) std::__1::__invoke)::$_18&, std::__1::shared_ptr&>(DB::StorageReplicatedMergeTree::processQueueEntry(std::__1::shared_ptr)::$_18&, std::__1::shared_ptr&) @ 0x1f65fa32 in /usr/bin/clickhouse 02:47:18.979481 [ 29208 ] {} BaseDaemon: 13. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/__functional_base:317: bool std::__1::__invoke_void_return_wrapper::__call)::$_18&, std::__1::shared_ptr&>(DB::StorageReplicatedMergeTree::processQueueEntry(std::__1::shared_ptr)::$_18&, std::__1::shared_ptr BaseDaemon: 13. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/__functional_base:317: bool std::__1::__invoke_void_return_wrapper::__call)::$_18&, std::__1::shared_ptr&>(DB::StorageReplicatedMergeTree::processQueueEntry(std::__1::shared_ptr)::$_18&, std::__1::shared_ptr BaseDaemon: 14. ./obj-x86_64-linux-gnu/../contrib/libcxx/include/functional:1608: std::__1::__function::__default_alloc_func)::$_18, bool (std::__1::shared_ptr&)>::operator()(std::__1::shared_ptr&) @ 0x1f65f9b0 in /usr/bin/clickhouse 02:47:23.546946 [ 413 ] {} Application: Child process was terminated by signal 6. ```
CI: https://clickhouse-test-reports.s3.yandex.net/0/4a8b82232c11512232df3ecdf4ffaec287116ad5/stress_test_(debug).html#fail1 --- src/Storages/MergeTree/MergeTreeData.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 764f5d7adf7..86cbd9f1484 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2588,17 +2588,25 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) LOG_TRACE(log, "Trying to immediately remove part {}", part->getNameWithState()); - auto it = data_parts_by_info.find(part->info); - if (it == data_parts_by_info.end() || (*it).get() != part.get()) - throw Exception("Part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + if (part->getState() != DataPartState::Temporary) + { + auto it = data_parts_by_info.find(part->info); + if (it == data_parts_by_info.end() || (*it).get() != part.get()) + throw Exception("Part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); - part.reset(); + part.reset(); - if (!((*it)->getState() == DataPartState::Outdated && it->unique())) - return; + if (!((*it)->getState() == DataPartState::Outdated && it->unique())) + return; - modifyPartState(it, DataPartState::Deleting); - part_to_delete = *it; + modifyPartState(it, DataPartState::Deleting); + + part_to_delete = *it; + } + else + { + part_to_delete = std::move(part); + } } try From a7ee7621dd9a488496c5a9199dd228ea0de54792 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Fri, 27 Aug 2021 15:44:50 +0800 Subject: [PATCH 62/86] optimize code --- src/Storages/System/StorageSystemViews.cpp | 10 +++++----- .../0_stateless/02015_add_system_views.reference | 3 +-- tests/queries/0_stateless/02015_add_system_views.sql | 6 +++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/Storages/System/StorageSystemViews.cpp b/src/Storages/System/StorageSystemViews.cpp index 4e16c7502c5..91f8dfeb258 100644 --- a/src/Storages/System/StorageSystemViews.cpp +++ b/src/Storages/System/StorageSystemViews.cpp @@ -22,8 +22,8 @@ NamesAndTypesList StorageSystemViews::getNamesAndTypes() return { {"database", std::make_shared()}, {"name", std::make_shared()}, - {"table", std::make_shared()}, {"table_database", std::make_shared()}, + {"table", std::make_shared()}, {"type", std::move(view_type_datatype)}, }; } @@ -46,20 +46,20 @@ void StorageSystemViews::fillData(MutableColumns & res_columns, ContextPtr conte auto view_ptr = DatabaseCatalog::instance().getTable(view_id, context); QueryViewsLogElement::ViewType type = QueryViewsLogElement::ViewType::DEFAULT; - if (const auto * materialized_view = dynamic_cast(view_ptr.get())) + if (typeid_cast(view_ptr.get())) { type = QueryViewsLogElement::ViewType::MATERIALIZED; } - else if (const auto * live_view = dynamic_cast(view_ptr.get())) + else if (typeid_cast(view_ptr.get())) { type = QueryViewsLogElement::ViewType::LIVE; } col_num = 0; - res_columns[col_num++]->insert(table_id.database_name); - res_columns[col_num++]->insert(table_id.table_name); res_columns[col_num++]->insert(view_id.database_name); res_columns[col_num++]->insert(view_id.table_name); + res_columns[col_num++]->insert(table_id.database_name); + res_columns[col_num++]->insert(table_id.table_name); res_columns[col_num++]->insert(type); } } diff --git a/tests/queries/0_stateless/02015_add_system_views.reference b/tests/queries/0_stateless/02015_add_system_views.reference index b261da18d51..3dd3295e43f 100644 --- a/tests/queries/0_stateless/02015_add_system_views.reference +++ b/tests/queries/0_stateless/02015_add_system_views.reference @@ -1,2 +1 @@ -1 -0 +default views_test_view default views_test Materialized diff --git a/tests/queries/0_stateless/02015_add_system_views.sql b/tests/queries/0_stateless/02015_add_system_views.sql index 779ed9a319a..933f77f58d2 100644 --- a/tests/queries/0_stateless/02015_add_system_views.sql +++ b/tests/queries/0_stateless/02015_add_system_views.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS views_test; CREATE TABLE views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; DROP TABLE IF EXISTS views_test_view; CREATE MATERIALIZED VIEW views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test; -SELECT count(*) FROM system.views WHERE name = 'views_test_view'; +SELECT * FROM system.views WHERE name = 'views_test_view'; DROP TABLE IF EXISTS views_test_view; -SELECT count(*) FROM system.views WHERE name = 'views_test_view'; -DROP TABLE IF EXISTS views_test; \ No newline at end of file +SELECT * FROM system.views WHERE name = 'views_test_view'; +DROP TABLE IF EXISTS views_test; From 7e3e0500034f6d75d8589ae6fb26edf421613661 Mon Sep 17 00:00:00 2001 From: Jakub Kuklis Date: Fri, 27 Aug 2021 10:14:17 +0200 Subject: [PATCH 63/86] Changing the tests to use mainly test.hits dataset, as hits_10[0]m_single datasets have a slightly different schema --- tests/performance/sequence_match.xml | 47 +++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/tests/performance/sequence_match.xml b/tests/performance/sequence_match.xml index 8f2008d30fc..35a2734df60 100644 --- a/tests/performance/sequence_match.xml +++ b/tests/performance/sequence_match.xml @@ -1,25 +1,62 @@ hits_10m_single + test.hits - + + + SELECT 1 FROM hits_10m_single GROUP BY EventTime HAVING + sequenceMatch('(?1)(?t<1)(?2)')( + EventTime, Age BETWEEN 20 AND 30, Age BETWEEN 35 AND 50) + FORMAT Null + + + SELECT 1 FROM test.hits GROUP BY EventTime HAVING + sequenceMatch('(?1)(?t<1)(?2)')( + EventTime, Age BETWEEN 20 AND 30, Age BETWEEN 35 AND 50) + FORMAT Null + + + + + SELECT 1 FROM hits_10m_single GROUP BY EventTime HAVING + sequenceMatch('(?1)(?t<1)(?2)')( + EventTime, Age BETWEEN 20 AND 30, Age BETWEEN 35 AND 50, Age >= 0) + FORMAT Null + + + SELECT 1 FROM test.hits GROUP BY EventTime HAVING + sequenceMatch('(?1)(?t<1)(?2)')( + EventTime, Age BETWEEN 20 AND 30, Age BETWEEN 35 AND 50, Age >= 0) + FORMAT Null + + + SELECT 1 FROM hits_10m_single GROUP BY EventTime HAVING sequenceMatch('(?1)(?t<1)(?2)')( EventTime, Age >= 0, Age = -1) FORMAT Null + + SELECT 1 FROM test.hits GROUP BY EventTime HAVING + sequenceMatch('(?1)(?t<1)(?2)')( + EventTime, Age >= 0, Age = -1) + FORMAT Null + + + - SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + SELECT 1 FROM test.hits WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?t>1000)(?3)')( EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) FORMAT Null - SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + SELECT 1 FROM test.hits WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?t<10000)(?2)')( EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) FORMAT Null @@ -27,13 +64,13 @@ - SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + SELECT 1 FROM test.hits WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?3)(?1)(?3)')( EventTime, hasAny(RefererCategories, [9]), hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAll(RefererCategories, [1, 9]), hasAny(RefererCategories, [1, 2326, 5496])) FORMAT Null - SELECT 1 FROM hits_10m_single WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING + SELECT 1 FROM test.hits WHERE RefererCategories != [] GROUP BY ClientIP, RequestNum HAVING sequenceMatch('(?1)(?2)(?1)(?2)(?1)')( EventTime, hasAny(RefererCategories, [3849, 2, 3, 4, 5, 6, 7]), hasAny(RefererCategories, [1, 2])) FORMAT Null From 7ddb4a9ccc1a26c6ac521268974123638dd66df2 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 26 Aug 2021 18:00:27 +0300 Subject: [PATCH 64/86] maybe fix livelock in zookeeper client --- src/Common/ConcurrentBoundedQueue.h | 126 +++++++++++++++---------- src/Common/ZooKeeper/ZooKeeperImpl.cpp | 28 +++--- src/Common/ZooKeeper/ZooKeeperImpl.h | 6 +- 3 files changed, 90 insertions(+), 70 deletions(-) diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index cb29efc3349..bc9d55ff8f5 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -2,11 +2,21 @@ #include #include +#include #include #include #include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} +} /** A very simple thread-safe queue of limited size. * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty. @@ -17,9 +27,41 @@ class ConcurrentBoundedQueue { private: std::queue queue; - Poco::FastMutex mutex; + mutable Poco::FastMutex mutex; Poco::Semaphore fill_count; Poco::Semaphore empty_count; + std::atomic_bool closed = false; + + template + bool tryEmplaceImpl(Args &&... args) + { + bool emplaced = true; + + { + Poco::ScopedLock lock(mutex); + if (closed) + emplaced = false; + else + queue.emplace(std::forward(args)...); + } + + if (emplaced) + fill_count.set(); + else + empty_count.set(); + + return emplaced; + } + + void popImpl(T & x) + { + { + Poco::ScopedLock lock(mutex); + detail::moveOrCopyIfThrow(std::move(queue.front()), x); + queue.pop(); + } + empty_count.set(); + } public: explicit ConcurrentBoundedQueue(size_t max_fill) @@ -30,91 +72,75 @@ public: void push(const T & x) { empty_count.wait(); - { - Poco::ScopedLock lock(mutex); - queue.push(x); - } - fill_count.set(); + if (!tryEmplaceImpl(x)) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "tryPush/tryEmplace must be used with close()"); } template void emplace(Args &&... args) { empty_count.wait(); - { - Poco::ScopedLock lock(mutex); - queue.emplace(std::forward(args)...); - } - fill_count.set(); + if (!tryEmplaceImpl(std::forward(args)...)) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "tryPush/tryEmplace must be used with close()"); } void pop(T & x) { fill_count.wait(); - { - Poco::ScopedLock lock(mutex); - detail::moveOrCopyIfThrow(std::move(queue.front()), x); - queue.pop(); - } - empty_count.set(); + popImpl(x); } bool tryPush(const T & x, UInt64 milliseconds = 0) { - if (empty_count.tryWait(milliseconds)) - { - { - Poco::ScopedLock lock(mutex); - queue.push(x); - } - fill_count.set(); - return true; - } - return false; + if (!empty_count.tryWait(milliseconds)) + return false; + + return tryEmplaceImpl(x); } template bool tryEmplace(UInt64 milliseconds, Args &&... args) { - if (empty_count.tryWait(milliseconds)) - { - { - Poco::ScopedLock lock(mutex); - queue.emplace(std::forward(args)...); - } - fill_count.set(); - return true; - } - return false; + if (!empty_count.tryWait(milliseconds)) + return false; + + return tryEmplaceImpl(std::forward(args)...); } bool tryPop(T & x, UInt64 milliseconds = 0) { - if (fill_count.tryWait(milliseconds)) - { - { - Poco::ScopedLock lock(mutex); - detail::moveOrCopyIfThrow(std::move(queue.front()), x); - queue.pop(); - } - empty_count.set(); - return true; - } - return false; + if (!fill_count.tryWait(milliseconds)) + return false; + + popImpl(x); + return true; } - size_t size() + size_t size() const { Poco::ScopedLock lock(mutex); return queue.size(); } - size_t empty() + size_t empty() const { Poco::ScopedLock lock(mutex); return queue.empty(); } + /// Forbids to push new elements to queue. + /// Returns false if queue was not closed before call, returns true if queue was already closed. + bool close() + { + Poco::ScopedLock lock(mutex); + return closed.exchange(true); + } + + bool isClosed() const + { + return closed.load(); + } + void clear() { while (fill_count.tryWait(0)) diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index 5f15a3b8b75..a883d4d7b76 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -539,7 +539,7 @@ void ZooKeeper::sendThread() try { - while (!expired) + while (!requests_queue.isClosed()) { auto prev_bytes_sent = out->count(); @@ -571,7 +571,7 @@ void ZooKeeper::sendThread() info.request->has_watch = true; } - if (expired) + if (requests_queue.isClosed()) { break; } @@ -616,7 +616,7 @@ void ZooKeeper::receiveThread() try { Int64 waited = 0; - while (!expired) + while (!requests_queue.isClosed()) { auto prev_bytes_received = in->count(); @@ -639,7 +639,7 @@ void ZooKeeper::receiveThread() if (in->poll(max_wait)) { - if (expired) + if (requests_queue.isClosed()) break; receiveEvent(); @@ -839,12 +839,10 @@ void ZooKeeper::finalize(bool error_send, bool error_receive) auto expire_session_if_not_expired = [&] { - std::lock_guard lock(push_request_mutex); - if (!expired) - { - expired = true; + /// No new requests will appear in queue after close() + bool was_already_closed = requests_queue.close(); + if (!was_already_closed) active_session_metric_increment.destroy(); - } }; try @@ -1017,17 +1015,15 @@ void ZooKeeper::pushRequest(RequestInfo && info) } } - /// We must serialize 'pushRequest' and 'finalize' (from sendThread, receiveThread) calls - /// to avoid forgotten operations in the queue when session is expired. - /// Invariant: when expired, no new operations will be pushed to the queue in 'pushRequest' - /// and the queue will be drained in 'finalize'. - std::lock_guard lock(push_request_mutex); - - if (expired) + if (requests_queue.isClosed()) throw Exception("Session expired", Error::ZSESSIONEXPIRED); if (!requests_queue.tryPush(std::move(info), operation_timeout.totalMilliseconds())) + { + if (requests_queue.isClosed()) + throw Exception("Session expired", Error::ZSESSIONEXPIRED); throw Exception("Cannot push request to queue within operation timeout", Error::ZOPERATIONTIMEOUT); + } } catch (...) { diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 8f0f64ceafa..9dd71bc554a 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -121,7 +121,7 @@ public: /// If expired, you can only destroy the object. All other methods will throw exception. - bool isExpired() const override { return expired; } + bool isExpired() const override { return requests_queue.isClosed(); } /// Useful to check owner of ephemeral node. int64_t getSessionID() const override { return session_id; } @@ -205,11 +205,9 @@ private: int64_t session_id = 0; std::atomic next_xid {1}; - std::atomic expired {false}; /// Mark session finalization start. Used to avoid simultaneous /// finalization from different threads. One-shot flag. std::atomic finalization_started {false}; - std::mutex push_request_mutex; using clock = std::chrono::steady_clock; @@ -223,7 +221,7 @@ private: using RequestsQueue = ConcurrentBoundedQueue; - RequestsQueue requests_queue{1}; + RequestsQueue requests_queue{1024}; void pushRequest(RequestInfo && info); using Operations = std::map; From ce96b2c35a9f86a1bc1beb19d8dc0c9f65bd34e4 Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Fri, 27 Aug 2021 20:17:58 +0800 Subject: [PATCH 65/86] bump --- src/Storages/MergeTree/DataPartsExchange.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 3df7b6160e0..32bcd4cff6e 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -57,7 +57,8 @@ constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION = 4; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID = 5; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY = 6; constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION = 7; -constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PRIMARY_KEY = 8; +// Reserved for ALTER PRIMARY KEY +// constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PRIMARY_KEY = 8; std::string getEndpointId(const std::string & node_id) From ef1994f42007f8d73852e012ca368e42a97d9d03 Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Sat, 28 Aug 2021 09:16:48 +0800 Subject: [PATCH 66/86] set env LIBHDFS3_CONF, refers to ClickHouse#8159 --- .../table-engines/integrations/hdfs.md | 19 ++++++++++++++++++- src/Storages/HDFS/HDFSCommon.cpp | 6 ++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 677354b4f97..81dd29741aa 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -184,9 +184,10 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us |hadoop\_kerberos\_keytab | "" | |hadoop\_kerberos\_principal | "" | |hadoop\_kerberos\_kinit\_command | kinit | +|libhdfs3\_conf | "" | ### Limitations {#limitations} - * hadoop\_security\_kerberos\_ticket\_cache\_path can be global only, not user specific + * hadoop\_security\_kerberos\_ticket\_cache\_path and libhdfs3\_conf can be global only, not user specific ## Kerberos support {#kerberos-support} @@ -198,6 +199,22 @@ security approach). Use tests/integration/test\_storage\_kerberized\_hdfs/hdfs_c If hadoop\_kerberos\_keytab, hadoop\_kerberos\_principal or hadoop\_kerberos\_kinit\_command is specified, kinit will be invoked. hadoop\_kerberos\_keytab and hadoop\_kerberos\_principal are mandatory in this case. kinit tool and krb5 configuration files are required. +## HDFS Namenode HA support{#namenode-ha} + +libhdfs3 support HDFS namenode HA. + +- Copy `hdfs-site.xml` from an HDFS node to `/etc/clickhouse-server/`. +- Add following piece to ClickHouse config file: + +``` xml + + /etc/clickhouse-server/hdfs-site.xml + +``` + +- Then use `dfs.nameservices` tag value of `hdfs-site.xml` as the namenode address in the HDFS URI. For example, replace `hdfs://appadmin@192.168.101.11:8020/abc/` with `hdfs://appadmin@my_nameservice/abc/`. + + ## Virtual Columns {#virtual-columns} - `_path` — Path to the file. diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index d7e57a0f9eb..da56dafbdc3 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -123,6 +123,12 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A if (host.empty()) throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + // Shall set env LIBHDFS3_CONF *before* HDFSBuilderWrapper construction. + const String & libhdfs3_conf = config.getString(HDFSBuilderWrapper::CONFIG_PREFIX + ".libhdfs3_conf", ""); + if (!libhdfs3_conf.empty()) + { + setenv("LIBHDFS3_CONF", libhdfs3_conf.c_str(), 1); + } HDFSBuilderWrapper builder; if (builder.get() == nullptr) throw Exception("Unable to create builder to connect to HDFS: " + From addce5c1327b154839e1fd55faed67742c7d4fe2 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Sat, 28 Aug 2021 12:33:35 +0800 Subject: [PATCH 67/86] fix system views test --- .../0_stateless/02015_add_system_views.reference | 1 - .../queries/0_stateless/02015_add_system_views.sql | 8 -------- .../0_stateless/02015_system_views.reference | 1 + tests/queries/0_stateless/02015_system_views.sql | 14 ++++++++++++++ 4 files changed, 15 insertions(+), 9 deletions(-) delete mode 100644 tests/queries/0_stateless/02015_add_system_views.reference delete mode 100644 tests/queries/0_stateless/02015_add_system_views.sql create mode 100644 tests/queries/0_stateless/02015_system_views.reference create mode 100644 tests/queries/0_stateless/02015_system_views.sql diff --git a/tests/queries/0_stateless/02015_add_system_views.reference b/tests/queries/0_stateless/02015_add_system_views.reference deleted file mode 100644 index 3dd3295e43f..00000000000 --- a/tests/queries/0_stateless/02015_add_system_views.reference +++ /dev/null @@ -1 +0,0 @@ -default views_test_view default views_test Materialized diff --git a/tests/queries/0_stateless/02015_add_system_views.sql b/tests/queries/0_stateless/02015_add_system_views.sql deleted file mode 100644 index 933f77f58d2..00000000000 --- a/tests/queries/0_stateless/02015_add_system_views.sql +++ /dev/null @@ -1,8 +0,0 @@ -DROP TABLE IF EXISTS views_test; -CREATE TABLE views_test (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; -DROP TABLE IF EXISTS views_test_view; -CREATE MATERIALIZED VIEW views_test_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM views_test; -SELECT * FROM system.views WHERE name = 'views_test_view'; -DROP TABLE IF EXISTS views_test_view; -SELECT * FROM system.views WHERE name = 'views_test_view'; -DROP TABLE IF EXISTS views_test; diff --git a/tests/queries/0_stateless/02015_system_views.reference b/tests/queries/0_stateless/02015_system_views.reference new file mode 100644 index 00000000000..a1b1b2a9fd3 --- /dev/null +++ b/tests/queries/0_stateless/02015_system_views.reference @@ -0,0 +1 @@ +02015_db materialized_view 02015_db view_source_tb Materialized diff --git a/tests/queries/0_stateless/02015_system_views.sql b/tests/queries/0_stateless/02015_system_views.sql new file mode 100644 index 00000000000..a6375dcb591 --- /dev/null +++ b/tests/queries/0_stateless/02015_system_views.sql @@ -0,0 +1,14 @@ +DROP DATABASE IF EXISTS 02015_db; +CREATE DATABASE IF NOT EXISTS 02015_db; + +DROP TABLE IF EXISTS 02015_db.view_source_tb; +CREATE TABLE IF NOT EXISTS 02015_db.view_source_tb (a UInt8, s String) ENGINE = MergeTree() ORDER BY a; + +DROP TABLE IF EXISTS 02015_db.materialized_view; +CREATE MATERIALIZED VIEW IF NOT EXISTS 02015_db.materialized_view ENGINE = ReplacingMergeTree() ORDER BY a AS SELECT * FROM 02015_db.view_source_tb; + +SELECT * FROM system.views WHERE database='02015_db' and name = 'materialized_view'; + +DROP TABLE IF EXISTS 02015_db.materialized_view; +DROP TABLE IF EXISTS 02015_db.view_source_tb; +DROP DATABASE IF EXISTS 02015_db; From d7c98293c17f602dc0b07001bd059aaca035a6c4 Mon Sep 17 00:00:00 2001 From: zhongyuankai Date: Sat, 28 Aug 2021 14:55:49 +0800 Subject: [PATCH 68/86] delete 02015_system_views parallel test --- tests/queries/skip_list.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 335ed370b9b..0143cc78dbe 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -512,6 +512,7 @@ "01532_execute_merges_on_single_replica", /// static zk path "01530_drop_database_atomic_sync", /// creates database "02001_add_default_database_to_system_users", ///create user - "02002_row_level_filter_bug" ///create user + "02002_row_level_filter_bug", ///create user + "02015_system_views" ] } From fb311199ce847b4366b8509caa91f101aff39167 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sat, 28 Aug 2021 19:19:21 +0300 Subject: [PATCH 69/86] test/stress: fix patterns for filtering out Raft messages --- docker/test/stress/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 87d127ab946..b6677e9d217 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -58,7 +58,7 @@ function start() echo "Cannot start clickhouse-server" cat /var/log/clickhouse-server/stdout.log tail -n1000 /var/log/clickhouse-server/stderr.log - tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | grep -F -v ' RaftInstance:' -e ' RaftInstance' | tail -n1000 + tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | grep -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n1000 break fi # use root to match with current uid From f27f519aa20fe6a560464be724fcce40f183fcdf Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 28 Aug 2021 20:21:05 +0000 Subject: [PATCH 70/86] Fix build and add example --- programs/CMakeLists.txt | 22 ++++++++--------- programs/config_tools.h.in | 2 +- programs/main.cpp | 8 +++---- .../static-files-disk-uploader/CMakeLists.txt | 10 ++++++++ ...clickhouse-static-files-disk-uploader.cpp} | 11 ++++----- programs/web-server-exporter/CMakeLists.txt | 9 ------- src/Disks/DiskWebServer.h | 24 +++++++++++++++++++ .../test_disk_over_web_server/test.py | 2 +- 8 files changed, 55 insertions(+), 33 deletions(-) create mode 100644 programs/static-files-disk-uploader/CMakeLists.txt rename programs/{web-server-exporter/clickhouse-web-server-exporter.cpp => static-files-disk-uploader/clickhouse-static-files-disk-uploader.cpp} (94%) delete mode 100644 programs/web-server-exporter/CMakeLists.txt diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index fc0b4e0e9e7..b7560cd4a00 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -47,7 +47,7 @@ option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Li # https://presentations.clickhouse.tech/matemarketing_2020/ option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER "A tool to put table data files to a web server" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER "A tool to put table data files to a web server" ${ENABLE_CLICKHOUSE_ALL}) option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_CLICKHOUSE_ALL}) @@ -227,7 +227,7 @@ add_subdirectory (obfuscator) add_subdirectory (install) add_subdirectory (git-import) add_subdirectory (bash-completion) -add_subdirectory (web-server-exporter) +add_subdirectory (static-files-disk-uploader) if (ENABLE_CLICKHOUSE_KEEPER) add_subdirectory (keeper) @@ -260,7 +260,7 @@ if (CLICKHOUSE_ONE_SHARED) ${CLICKHOUSE_ODBC_BRIDGE_SOURCES} ${CLICKHOUSE_KEEPER_SOURCES} ${CLICKHOUSE_KEEPER_CONVERTER_SOURCES} - ${CLICKHOUSE_WEB_SERVER_EXPORTER_SOURCES}) + ${CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_SOURCES}) target_link_libraries(clickhouse-lib ${CLICKHOUSE_SERVER_LINK} @@ -276,7 +276,7 @@ if (CLICKHOUSE_ONE_SHARED) ${CLICKHOUSE_ODBC_BRIDGE_LINK} ${CLICKHOUSE_KEEPER_LINK} ${CLICKHOUSE_KEEPER_CONVERTER_LINK} - ${CLICKHOUSE_WEB_SERVER_EXPORTER_LINK}) + ${CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_LINK}) target_include_directories(clickhouse-lib ${CLICKHOUSE_SERVER_INCLUDE} @@ -309,7 +309,7 @@ if (CLICKHOUSE_SPLIT_BINARY) clickhouse-obfuscator clickhouse-git-import clickhouse-copier - clickhouse-web-server-exporter + clickhouse-static-files-disk-uploader ) if (ENABLE_CLICKHOUSE_ODBC_BRIDGE) @@ -375,8 +375,8 @@ else () if (ENABLE_CLICKHOUSE_GIT_IMPORT) clickhouse_target_link_split_lib(clickhouse git-import) endif () - if (ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER) - clickhouse_target_link_split_lib(clickhouse web-server-exporter) + if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER) + clickhouse_target_link_split_lib(clickhouse static-files-disk-uploader) endif () if (ENABLE_CLICKHOUSE_KEEPER) clickhouse_target_link_split_lib(clickhouse keeper) @@ -439,10 +439,10 @@ else () install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-git-import" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) list(APPEND CLICKHOUSE_BUNDLE clickhouse-git-import) endif () - if (ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER) - add_custom_target (clickhouse-web-server-exporter ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-web-server-exporter DEPENDS clickhouse) - install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-web-server-exporter" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) - list(APPEND CLICKHOUSE_BUNDLE clickhouse-web-server-exporter) + if (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER) + add_custom_target (clickhouse-static-files-disk-uploader ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-static-files-disk-uploader DEPENDS clickhouse) + install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-static-files-disk-uploader" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + list(APPEND CLICKHOUSE_BUNDLE clickhouse-static-files-disk-uploader) endif () if (ENABLE_CLICKHOUSE_KEEPER) add_custom_target (clickhouse-keeper ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper DEPENDS clickhouse) diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in index 2eea141a5e2..b97eb63b535 100644 --- a/programs/config_tools.h.in +++ b/programs/config_tools.h.in @@ -18,4 +18,4 @@ #cmakedefine01 ENABLE_CLICKHOUSE_LIBRARY_BRIDGE #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER #cmakedefine01 ENABLE_CLICKHOUSE_KEEPER_CONVERTER -#cmakedefine01 ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER +#cmakedefine01 ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER diff --git a/programs/main.cpp b/programs/main.cpp index 4375f78e1db..1ae96bffecd 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -62,8 +62,8 @@ int mainEntryClickHouseKeeper(int argc, char ** argv); #if ENABLE_CLICKHOUSE_KEEPER int mainEntryClickHouseKeeperConverter(int argc, char ** argv); #endif -#if ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER -int mainEntryClickHouseWebServerExporter(int argc, char ** argv); +#if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER +int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv); #endif #if ENABLE_CLICKHOUSE_INSTALL int mainEntryClickHouseInstall(int argc, char ** argv); @@ -135,8 +135,8 @@ std::pair clickhouse_applications[] = {"status", mainEntryClickHouseStatus}, {"restart", mainEntryClickHouseRestart}, #endif -#if ENABLE_CLICKHOUSE_WEB_SERVER_EXPORTER - {"web-server-exporter", mainEntryClickHouseWebServerExporter}, +#if ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER + {"static-files-disk-uploader", mainEntryClickHouseStaticFilesDiskUploader}, #endif {"hash-binary", mainEntryClickHouseHashBinary}, }; diff --git a/programs/static-files-disk-uploader/CMakeLists.txt b/programs/static-files-disk-uploader/CMakeLists.txt new file mode 100644 index 00000000000..5590da8024b --- /dev/null +++ b/programs/static-files-disk-uploader/CMakeLists.txt @@ -0,0 +1,10 @@ +set (CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_SOURCES clickhouse-static-files-disk-uploader.cpp) + +set (CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_LINK + PRIVATE + boost::program_options + common + dbms +) + +clickhouse_program_add(static-files-disk-uploader) diff --git a/programs/web-server-exporter/clickhouse-web-server-exporter.cpp b/programs/static-files-disk-uploader/clickhouse-static-files-disk-uploader.cpp similarity index 94% rename from programs/web-server-exporter/clickhouse-web-server-exporter.cpp rename to programs/static-files-disk-uploader/clickhouse-static-files-disk-uploader.cpp index b3cb25af900..2314c2731f1 100644 --- a/programs/web-server-exporter/clickhouse-web-server-exporter.cpp +++ b/programs/static-files-disk-uploader/clickhouse-static-files-disk-uploader.cpp @@ -28,14 +28,14 @@ namespace ErrorCodes } /* - * A tool to collect files on local fs as is (into current directory or into path from --output-dir option). + * A tool to collect table data files on local fs as is (into current directory or into path from --output-dir option). * If test-mode option is added, files will be put by given url via PUT request. */ -void processTableFiles(const fs::path & path, const String & files_prefix, String uuid, WriteBuffer & metadata_buf, std::function(const String &)> create_dst_buf) +void processTableFiles(const fs::path & path, const String & files_prefix, String uuid, + WriteBuffer & metadata_buf, std::function(const String &)> create_dst_buf) { fs::directory_iterator dir_end; - auto process_file = [&](const String & file_name, const String & file_path) { auto remote_file_name = files_prefix + "-" + uuid + "-" + file_name; @@ -58,9 +58,7 @@ void processTableFiles(const fs::path & path, const String & files_prefix, Strin { fs::directory_iterator files_end; for (fs::directory_iterator file_it(dir_it->path()); file_it != files_end; ++file_it) - { process_file(dir_it->path().filename().string() + "-" + file_it->path().filename().string(), file_it->path()); - } } else { @@ -70,8 +68,7 @@ void processTableFiles(const fs::path & path, const String & files_prefix, Strin } } - -int mainEntryClickHouseWebServerExporter(int argc, char ** argv) +int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv) try { using namespace DB; diff --git a/programs/web-server-exporter/CMakeLists.txt b/programs/web-server-exporter/CMakeLists.txt deleted file mode 100644 index 91c585049a7..00000000000 --- a/programs/web-server-exporter/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -set (CLICKHOUSE_WEB_SERVER_EXPORTER_SOURCES clickhouse-web-server-exporter.cpp) - -set (CLICKHOUSE_WEB_SERVER_EXPORTER_LINK - PRIVATE - boost::program_options - dbms -) - -clickhouse_program_add(web-server-exporter) diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index 7ce268a2d44..d9fd6486edd 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -27,6 +27,30 @@ struct DiskWebServerSettings /* + * Quick ready test - you can try this disk, by using these queries (disk has two tables) and this endpoint: + * + * ATTACH TABLE contributors UUID 'a563f7d8-fb00-4d50-a563-f7d8fb007d50' (good_person_name String) engine=MergeTree() order by good_person_name settings storage_policy='web'; + * ATTACH TABLE test UUID '11c7a2f9-a949-4c88-91c7-a2f9a949ec88' (a Int32) engine=MergeTree() order by a settings storage_policy='web'; + * + * + * + * + * web + * https://clickhouse-datasets.s3.yandex.net/kssenii-static-files-disk-test/kssenii-disk-tests/test1/ + * data + * + * + * + * + * + *
+ * web + *
+ *
+ *
+ *
+ *
+ * * If url is not reachable on disk load when server is starting up tables, then all errors are caught. * If in this case there were errors, tables can be reloaded (become visible) via detach table table_name -> attach table table_name. * If metadata was successfully loaded at server startup, then tables are available straight away. diff --git a/tests/integration/test_disk_over_web_server/test.py b/tests/integration/test_disk_over_web_server/test.py index fd31c48625d..78e50dee1bf 100644 --- a/tests/integration/test_disk_over_web_server/test.py +++ b/tests/integration/test_disk_over_web_server/test.py @@ -26,7 +26,7 @@ def cluster(): print(f'Metadata: {metadata_path}') node1.exec_in_container(['bash', '-c', - '/usr/bin/clickhouse web-server-exporter --test-mode --files-prefix data --url http://nginx:80/test1 --metadata-path {}'.format(metadata_path)], user='root') + '/usr/bin/clickhouse static-files-disk-uploader --test-mode --files-prefix data --url http://nginx:80/test1 --metadata-path {}'.format(metadata_path)], user='root') parts = metadata_path.split('/') uuids.append(parts[3]) print(f'UUID: {parts[3]}') From 3e7f022d668975b2e96209f4147515da1f8dc18c Mon Sep 17 00:00:00 2001 From: kssenii Date: Sat, 28 Aug 2021 20:51:20 +0000 Subject: [PATCH 71/86] Extend usage comment explanation a little --- src/Disks/DiskWebServer.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index d9fd6486edd..b90733bf5e9 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -51,6 +51,10 @@ struct DiskWebServerSettings * * * + * To get files for upload run: + * clickhouse static-files-disk-uploader --metadata-path --output-dir --files-prefix data + * (--metadata-path can be found in query: `select data_paths from system.tables where name='';`) + * * If url is not reachable on disk load when server is starting up tables, then all errors are caught. * If in this case there were errors, tables can be reloaded (become visible) via detach table table_name -> attach table table_name. * If metadata was successfully loaded at server startup, then tables are available straight away. From fb6ce3b64e9b6b44624136d66c62c0ce2cefb91b Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 29 Aug 2021 08:53:04 +0000 Subject: [PATCH 72/86] Fix build check --- programs/static-files-disk-uploader/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/static-files-disk-uploader/CMakeLists.txt b/programs/static-files-disk-uploader/CMakeLists.txt index 5590da8024b..dc315300991 100644 --- a/programs/static-files-disk-uploader/CMakeLists.txt +++ b/programs/static-files-disk-uploader/CMakeLists.txt @@ -3,7 +3,7 @@ set (CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_SOURCES clickhouse-static-files-disk- set (CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_LINK PRIVATE boost::program_options - common + clickhouse_common_io dbms ) From 9e7ae5dd6d6abb692773ed421b512eaaaaaa4d4f Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Sat, 28 Aug 2021 21:48:37 +0800 Subject: [PATCH 73/86] Use real tmp file instead of predefined one --- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 00a599af9c3..271d22de3bd 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -783,7 +783,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor auto compression_codec = data.getCompressionCodecForPart(merge_entry->total_size_bytes_compressed, new_data_part->ttl_infos, time_of_merge); auto tmp_disk = context->getTemporaryVolume()->getDisk(); - String rows_sources_file_path; + std::unique_ptr rows_sources_file; std::unique_ptr rows_sources_uncompressed_write_buf; std::unique_ptr rows_sources_write_buf; std::optional column_sizes; @@ -792,9 +792,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor if (chosen_merge_algorithm == MergeAlgorithm::Vertical) { - tmp_disk->createDirectories(new_part_tmp_path); - rows_sources_file_path = new_part_tmp_path + "rows_sources"; - rows_sources_uncompressed_write_buf = tmp_disk->writeFile(rows_sources_file_path); + rows_sources_file = createTemporaryFile(tmp_disk->getPath()); + rows_sources_uncompressed_write_buf = tmp_disk->writeFile(fileName(rows_sources_file->path())); rows_sources_write_buf = std::make_unique(*rows_sources_uncompressed_write_buf); MergeTreeData::DataPart::ColumnToSize merged_column_to_size; @@ -1030,7 +1029,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor + ") differs from number of bytes written to rows_sources file (" + toString(rows_sources_count) + "). It is a bug.", ErrorCodes::LOGICAL_ERROR); - CompressedReadBufferFromFile rows_sources_read_buf(tmp_disk->readFile(rows_sources_file_path)); + CompressedReadBufferFromFile rows_sources_read_buf(tmp_disk->readFile(fileName(rows_sources_file->path()))); IMergedBlockOutputStream::WrittenOffsetColumns written_offset_columns; for (size_t column_num = 0, gathering_column_names_size = gathering_column_names.size(); @@ -1101,8 +1100,6 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merge_entry->bytes_written_uncompressed += column_gathered_stream.getProfileInfo().bytes; merge_entry->progress.store(progress_before + column_sizes->columnWeight(column_name), std::memory_order_relaxed); } - - tmp_disk->removeFile(rows_sources_file_path); } for (const auto & part : parts) From a9308235182b013e5d7bc344f2bbc9f9e5642f46 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 29 Aug 2021 14:18:04 +0000 Subject: [PATCH 74/86] Fix build --- programs/CMakeLists.txt | 2 +- .../static-files-disk-uploader/CMakeLists.txt | 3 +- .../clickhouse-static-files-disk-uploader.cpp | 164 +----------------- .../static-files-disk-uploader.cpp | 162 +++++++++++++++++ 4 files changed, 166 insertions(+), 165 deletions(-) create mode 100644 programs/static-files-disk-uploader/static-files-disk-uploader.cpp diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index b7560cd4a00..005de886a5c 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -47,7 +47,7 @@ option (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE "HTTP-server working like a proxy to Li # https://presentations.clickhouse.tech/matemarketing_2020/ option (ENABLE_CLICKHOUSE_GIT_IMPORT "A tool to analyze Git repositories" ${ENABLE_CLICKHOUSE_ALL}) -option (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER "A tool to put table data files to a web server" ${ENABLE_CLICKHOUSE_ALL}) +option (ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER "A tool to export table data files to be later put to a static files web server" ${ENABLE_CLICKHOUSE_ALL}) option (ENABLE_CLICKHOUSE_KEEPER "ClickHouse alternative to ZooKeeper" ${ENABLE_CLICKHOUSE_ALL}) diff --git a/programs/static-files-disk-uploader/CMakeLists.txt b/programs/static-files-disk-uploader/CMakeLists.txt index dc315300991..1f76dd7e8b2 100644 --- a/programs/static-files-disk-uploader/CMakeLists.txt +++ b/programs/static-files-disk-uploader/CMakeLists.txt @@ -1,9 +1,8 @@ -set (CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_SOURCES clickhouse-static-files-disk-uploader.cpp) +set (CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_SOURCES static-files-disk-uploader.cpp) set (CLICKHOUSE_STATIC_FILES_DISK_UPLOADER_LINK PRIVATE boost::program_options - clickhouse_common_io dbms ) diff --git a/programs/static-files-disk-uploader/clickhouse-static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/clickhouse-static-files-disk-uploader.cpp index 2314c2731f1..063604b10b1 100644 --- a/programs/static-files-disk-uploader/clickhouse-static-files-disk-uploader.cpp +++ b/programs/static-files-disk-uploader/clickhouse-static-files-disk-uploader.cpp @@ -1,162 +1,2 @@ -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace fs = std::filesystem; - -#define UUID_PATTERN "[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}" -#define EXTRACT_UUID_PATTERN fmt::format(".*/({})/.*", UUID_PATTERN) - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -/* - * A tool to collect table data files on local fs as is (into current directory or into path from --output-dir option). - * If test-mode option is added, files will be put by given url via PUT request. - */ - -void processTableFiles(const fs::path & path, const String & files_prefix, String uuid, - WriteBuffer & metadata_buf, std::function(const String &)> create_dst_buf) -{ - fs::directory_iterator dir_end; - auto process_file = [&](const String & file_name, const String & file_path) - { - auto remote_file_name = files_prefix + "-" + uuid + "-" + file_name; - writeText(remote_file_name, metadata_buf); - writeChar('\t', metadata_buf); - writeIntText(fs::file_size(file_path), metadata_buf); - writeChar('\n', metadata_buf); - - auto src_buf = createReadBufferFromFileBase(file_path, fs::file_size(file_path), 0, 0, nullptr); - auto dst_buf = create_dst_buf(remote_file_name); - - copyData(*src_buf, *dst_buf); - dst_buf->next(); - dst_buf->finalize(); - }; - - for (fs::directory_iterator dir_it(path); dir_it != dir_end; ++dir_it) - { - if (dir_it->is_directory()) - { - fs::directory_iterator files_end; - for (fs::directory_iterator file_it(dir_it->path()); file_it != files_end; ++file_it) - process_file(dir_it->path().filename().string() + "-" + file_it->path().filename().string(), file_it->path()); - } - else - { - process_file(dir_it->path().filename(), dir_it->path()); - } - } -} -} - -int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv) -try -{ - using namespace DB; - namespace po = boost::program_options; - - po::options_description description("Allowed options", getTerminalWidth()); - description.add_options() - ("help,h", "produce help message") - ("metadata-path", po::value(), "Metadata path (select data_paths from system.tables where name='table_name'") - ("test-mode", "Use test mode, which will put data on given url via PUT") - ("url", po::value(), "Web server url for test mode") - ("output-dir", po::value(), "Directory to put files in non-test mode") - ("files-prefix", po::value(), "Prefix for stored files"); - - po::parsed_options parsed = po::command_line_parser(argc, argv).options(description).run(); - po::variables_map options; - po::store(parsed, options); - po::notify(options); - - if (options.empty() || options.count("help")) - { - std::cout << description << std::endl; - exit(0); - } - - String url, metadata_path, files_prefix; - - if (options.count("metadata-path")) - metadata_path = options["metadata-path"].as(); - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No metadata-path option passed"); - - if (options.count("files-prefix")) - files_prefix = options["files-prefix"].as(); - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No files-prefix option passed"); - - fs::path fs_path = fs::weakly_canonical(metadata_path); - if (!fs::exists(fs_path)) - { - std::cerr << fmt::format("Data path ({}) does not exist", fs_path.string()); - return 1; - } - - String uuid; - if (!RE2::Extract(metadata_path, EXTRACT_UUID_PATTERN, "\\1", &uuid)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot extract uuid for: {}", metadata_path); - - std::shared_ptr metadata_buf; - std::function(const String &)> create_dst_buf; - String root_path; - - if (options.count("test-mode")) - { - if (options.count("url")) - url = options["url"].as(); - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "No url option passed for test mode"); - - metadata_buf = std::make_shared(Poco::URI(fs::path(url) / (".index-" + uuid)), Poco::Net::HTTPRequest::HTTP_PUT); - - create_dst_buf = [&](const String & remote_file_name) - { - return std::make_shared(Poco::URI(fs::path(url) / remote_file_name), Poco::Net::HTTPRequest::HTTP_PUT); - }; - } - else - { - if (options.count("output-dir")) - root_path = options["output-dir"].as(); - else - root_path = fs::current_path(); - - metadata_buf = std::make_shared(fs::path(root_path) / (".index-" + uuid)); - create_dst_buf = [&](const String & remote_file_name) - { - return std::make_shared(fs::path(root_path) / remote_file_name); - }; - } - - processTableFiles(fs_path, files_prefix, uuid, *metadata_buf, create_dst_buf); - metadata_buf->next(); - metadata_buf->finalize(); - - return 0; -} -catch (...) -{ - std::cerr << DB::getCurrentExceptionMessage(false); - return 1; -} +int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv); +int main(int argc_, char ** argv_) { return mainEntryClickHouseStaticFilesDiskUploader(argc_, argv_); } diff --git a/programs/static-files-disk-uploader/static-files-disk-uploader.cpp b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp new file mode 100644 index 00000000000..2314c2731f1 --- /dev/null +++ b/programs/static-files-disk-uploader/static-files-disk-uploader.cpp @@ -0,0 +1,162 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace fs = std::filesystem; + +#define UUID_PATTERN "[\\w]{8}-[\\w]{4}-[\\w]{4}-[\\w]{4}-[\\w]{12}" +#define EXTRACT_UUID_PATTERN fmt::format(".*/({})/.*", UUID_PATTERN) + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +/* + * A tool to collect table data files on local fs as is (into current directory or into path from --output-dir option). + * If test-mode option is added, files will be put by given url via PUT request. + */ + +void processTableFiles(const fs::path & path, const String & files_prefix, String uuid, + WriteBuffer & metadata_buf, std::function(const String &)> create_dst_buf) +{ + fs::directory_iterator dir_end; + auto process_file = [&](const String & file_name, const String & file_path) + { + auto remote_file_name = files_prefix + "-" + uuid + "-" + file_name; + writeText(remote_file_name, metadata_buf); + writeChar('\t', metadata_buf); + writeIntText(fs::file_size(file_path), metadata_buf); + writeChar('\n', metadata_buf); + + auto src_buf = createReadBufferFromFileBase(file_path, fs::file_size(file_path), 0, 0, nullptr); + auto dst_buf = create_dst_buf(remote_file_name); + + copyData(*src_buf, *dst_buf); + dst_buf->next(); + dst_buf->finalize(); + }; + + for (fs::directory_iterator dir_it(path); dir_it != dir_end; ++dir_it) + { + if (dir_it->is_directory()) + { + fs::directory_iterator files_end; + for (fs::directory_iterator file_it(dir_it->path()); file_it != files_end; ++file_it) + process_file(dir_it->path().filename().string() + "-" + file_it->path().filename().string(), file_it->path()); + } + else + { + process_file(dir_it->path().filename(), dir_it->path()); + } + } +} +} + +int mainEntryClickHouseStaticFilesDiskUploader(int argc, char ** argv) +try +{ + using namespace DB; + namespace po = boost::program_options; + + po::options_description description("Allowed options", getTerminalWidth()); + description.add_options() + ("help,h", "produce help message") + ("metadata-path", po::value(), "Metadata path (select data_paths from system.tables where name='table_name'") + ("test-mode", "Use test mode, which will put data on given url via PUT") + ("url", po::value(), "Web server url for test mode") + ("output-dir", po::value(), "Directory to put files in non-test mode") + ("files-prefix", po::value(), "Prefix for stored files"); + + po::parsed_options parsed = po::command_line_parser(argc, argv).options(description).run(); + po::variables_map options; + po::store(parsed, options); + po::notify(options); + + if (options.empty() || options.count("help")) + { + std::cout << description << std::endl; + exit(0); + } + + String url, metadata_path, files_prefix; + + if (options.count("metadata-path")) + metadata_path = options["metadata-path"].as(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No metadata-path option passed"); + + if (options.count("files-prefix")) + files_prefix = options["files-prefix"].as(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No files-prefix option passed"); + + fs::path fs_path = fs::weakly_canonical(metadata_path); + if (!fs::exists(fs_path)) + { + std::cerr << fmt::format("Data path ({}) does not exist", fs_path.string()); + return 1; + } + + String uuid; + if (!RE2::Extract(metadata_path, EXTRACT_UUID_PATTERN, "\\1", &uuid)) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot extract uuid for: {}", metadata_path); + + std::shared_ptr metadata_buf; + std::function(const String &)> create_dst_buf; + String root_path; + + if (options.count("test-mode")) + { + if (options.count("url")) + url = options["url"].as(); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No url option passed for test mode"); + + metadata_buf = std::make_shared(Poco::URI(fs::path(url) / (".index-" + uuid)), Poco::Net::HTTPRequest::HTTP_PUT); + + create_dst_buf = [&](const String & remote_file_name) + { + return std::make_shared(Poco::URI(fs::path(url) / remote_file_name), Poco::Net::HTTPRequest::HTTP_PUT); + }; + } + else + { + if (options.count("output-dir")) + root_path = options["output-dir"].as(); + else + root_path = fs::current_path(); + + metadata_buf = std::make_shared(fs::path(root_path) / (".index-" + uuid)); + create_dst_buf = [&](const String & remote_file_name) + { + return std::make_shared(fs::path(root_path) / remote_file_name); + }; + } + + processTableFiles(fs_path, files_prefix, uuid, *metadata_buf, create_dst_buf); + metadata_buf->next(); + metadata_buf->finalize(); + + return 0; +} +catch (...) +{ + std::cerr << DB::getCurrentExceptionMessage(false); + return 1; +} From d78dd54e16ccda935be2d7d7c8cf7a2be5704d12 Mon Sep 17 00:00:00 2001 From: olgarev Date: Sun, 29 Aug 2021 18:17:41 +0000 Subject: [PATCH 75/86] Initial --- docs/en/interfaces/mysql.md | 6 ++++++ docs/ru/interfaces/mysql.md | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index ee9af925671..a74fec72d25 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -44,4 +44,10 @@ Restrictions: - some data types are sent as strings +To cancel the long query use `KILL QUERY connection_id` statement (it is replaced with `KILL QUERY WHERE query_id = connection_id` while proceeding). For example: + +``` bash +$ mysql --protocol tcp -h mysql_server -P 9004 default -u default --password=123 -e "KILL QUERY 123456;" +``` + [Original article](https://clickhouse.tech/docs/en/interfaces/mysql/) diff --git a/docs/ru/interfaces/mysql.md b/docs/ru/interfaces/mysql.md index 925b1113109..34150e0f96e 100644 --- a/docs/ru/interfaces/mysql.md +++ b/docs/ru/interfaces/mysql.md @@ -43,3 +43,9 @@ mysql> - не поддерживаются подготовленные запросы - некоторые типы данных отправляются как строки + +Чтобы прервать долго выполняемый запрос, используйте запрос `KILL QUERY connection_id` (во время выполнения он будет заменен на `KILL QUERY WHERE query_id = connection_id`). Например: + +``` bash +$ mysql --protocol tcp -h mysql_server -P 9004 default -u default --password=123 -e "KILL QUERY 123456;" +``` \ No newline at end of file From 60d85d36bd32ce6e7ebad12542186367e920ca4a Mon Sep 17 00:00:00 2001 From: olgarev <56617294+olgarev@users.noreply.github.com> Date: Sun, 29 Aug 2021 21:24:52 +0300 Subject: [PATCH 76/86] Update docs/en/interfaces/mysql.md --- docs/en/interfaces/mysql.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index a74fec72d25..619e2deab31 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -44,7 +44,7 @@ Restrictions: - some data types are sent as strings -To cancel the long query use `KILL QUERY connection_id` statement (it is replaced with `KILL QUERY WHERE query_id = connection_id` while proceeding). For example: +To cancel a long query use `KILL QUERY connection_id` statement (it is replaced with `KILL QUERY WHERE query_id = connection_id` while proceeding). For example: ``` bash $ mysql --protocol tcp -h mysql_server -P 9004 default -u default --password=123 -e "KILL QUERY 123456;" From de9e59161956d7a427ef8b5eaac59615863cfbca Mon Sep 17 00:00:00 2001 From: Amos Bird Date: Mon, 30 Aug 2021 02:03:33 +0800 Subject: [PATCH 77/86] Fast load by delaying table startup --- src/Databases/DatabaseAtomic.cpp | 5 +++-- src/Databases/DatabaseAtomic.h | 2 +- src/Databases/DatabaseLazy.cpp | 6 +++--- src/Databases/DatabaseLazy.h | 4 +--- src/Databases/DatabaseOrdinary.cpp | 17 +++++++++++++---- src/Databases/DatabaseOrdinary.h | 6 ++++-- src/Databases/DatabaseReplicated.cpp | 5 +++-- src/Databases/DatabaseReplicated.h | 2 +- src/Databases/IDatabase.h | 10 +++++++++- .../MySQL/DatabaseMaterializedMySQL.cpp | 7 ++++--- src/Databases/MySQL/DatabaseMaterializedMySQL.h | 2 +- src/Databases/MySQL/DatabaseMySQL.cpp | 2 +- src/Databases/MySQL/DatabaseMySQL.h | 2 +- .../DatabaseMaterializedPostgreSQL.cpp | 6 +++--- .../PostgreSQL/DatabaseMaterializedPostgreSQL.h | 6 +++--- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 2 +- src/Databases/PostgreSQL/DatabasePostgreSQL.h | 2 +- src/Interpreters/DatabaseCatalog.cpp | 9 +++++++++ src/Interpreters/InterpreterCreateQuery.cpp | 3 ++- src/Interpreters/InterpreterCreateQuery.h | 6 ++++++ src/Interpreters/loadMetadata.cpp | 1 + 21 files changed, 71 insertions(+), 34 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index b55277594be..2dbcd652004 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -416,7 +416,8 @@ UUID DatabaseAtomic::tryGetTableUUID(const String & table_name) const return UUIDHelpers::Nil; } -void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach) +void DatabaseAtomic::loadStoredObjects( + ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) { /// Recreate symlinks to table data dirs in case of force restore, because some of them may be broken if (has_force_restore_data_flag) @@ -433,7 +434,7 @@ void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, bool has } } - DatabaseOrdinary::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach); + DatabaseOrdinary::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach, skip_startup_tables); if (has_force_restore_data_flag) { diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 21e841841bd..8be009cd6ca 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -47,7 +47,7 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; - void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override; + void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; /// Atomic database cannot be detached if there is detached table which still in use void assertCanBeDetached(bool cleanup) override; diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 5babb6c2975..7e0e1b7aa43 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -36,9 +36,7 @@ DatabaseLazy::DatabaseLazy(const String & name_, const String & metadata_path_, void DatabaseLazy::loadStoredObjects( - ContextMutablePtr local_context, - bool /* has_force_restore_data_flag */, - bool /*force_attach*/) + ContextMutablePtr local_context, bool /* has_force_restore_data_flag */, bool /*force_attach*/, bool /* skip_startup_tables */) { iterateMetadataFiles(local_context, [this](const String & file_name) { @@ -246,6 +244,8 @@ StoragePtr DatabaseLazy::loadTable(const String & table_name) const if (!ast || !endsWith(table->getName(), "Log")) throw Exception("Only *Log tables can be used with Lazy database engine.", ErrorCodes::LOGICAL_ERROR); + + table->startup(); { std::lock_guard lock(mutex); auto it = tables_cache.find(table_name); diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index ab8fbfc5a25..bc79a49b2fe 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -26,9 +26,7 @@ public: bool canContainDistributedTables() const override { return false; } - void loadStoredObjects( - ContextMutablePtr context, - bool has_force_restore_data_flag, bool force_attach) override; + void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; void createTable( ContextPtr context, diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 7758841afc2..bfe5de4c95f 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -83,7 +83,8 @@ DatabaseOrdinary::DatabaseOrdinary( { } -void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, bool has_force_restore_data_flag, bool /*force_attach*/) +void DatabaseOrdinary::loadStoredObjects( + ContextMutablePtr local_context, bool has_force_restore_data_flag, bool /*force_attach*/, bool skip_startup_tables) { /** Tables load faster if they are loaded in sorted (by name) order. * Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, @@ -201,12 +202,20 @@ void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, bool h pool.wait(); - /// After all tables was basically initialized, startup them. - startupTables(pool); + if (!skip_startup_tables) + { + /// After all tables was basically initialized, startup them. + startupTablesImpl(pool); + } } +void DatabaseOrdinary::startupTables() +{ + ThreadPool pool; + startupTablesImpl(pool); +} -void DatabaseOrdinary::startupTables(ThreadPool & thread_pool) +void DatabaseOrdinary::startupTablesImpl(ThreadPool & thread_pool) { LOG_INFO(log, "Starting up tables."); diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index 4d68890cc2b..7832377ccae 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -20,7 +20,9 @@ public: String getEngineName() const override { return "Ordinary"; } - void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override; + void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; + + void startupTables() override; void alterTable( ContextPtr context, @@ -35,7 +37,7 @@ protected: const String & statement, ContextPtr query_context); - void startupTables(ThreadPool & thread_pool); + void startupTablesImpl(ThreadPool & thread_pool); }; } diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 8e8fb4e2d6d..da03eb6aba6 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -305,11 +305,12 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt createEmptyLogEntry(current_zookeeper); } -void DatabaseReplicated::loadStoredObjects(ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach) +void DatabaseReplicated::loadStoredObjects( + ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) { tryConnectToZooKeeperAndInitDatabase(force_attach); - DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach); + DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach, skip_startup_tables); ddl_worker = std::make_unique(this, getContext()); ddl_worker->startup(); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 41b1bf13e5f..1e0daeed07e 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -57,7 +57,7 @@ public: void drop(ContextPtr /*context*/) override; - void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach) override; + void loadStoredObjects(ContextMutablePtr context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; void shutdown() override; friend struct DatabaseReplicatedTask; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 387c6882eab..bd9605dca71 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -123,7 +123,15 @@ public: /// Load a set of existing tables. /// You can call only once, right after the object is created. - virtual void loadStoredObjects(ContextMutablePtr /*context*/, bool /*has_force_restore_data_flag*/, bool /*force_attach*/ = false) {} + virtual void loadStoredObjects( + ContextMutablePtr /*context*/, + bool /*has_force_restore_data_flag*/, + bool /*force_attach*/ = false, + bool /* skip_startup_tables */ = false) + { + } + + virtual void startupTables() {} /// Check the existence of the table. virtual bool isTableExist(const String & name, ContextPtr context) const = 0; diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index ba9b30425dd..0d81a4e1a98 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -93,10 +93,11 @@ void DatabaseMaterializedMySQL::setException(const std::exception_ptr & ex exception = exception_; } -template -void DatabaseMaterializedMySQL::loadStoredObjects(ContextMutablePtr context_, bool has_force_restore_data_flag, bool force_attach) +template +void DatabaseMaterializedMySQL::loadStoredObjects( + ContextMutablePtr context_, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) { - Base::loadStoredObjects(context_, has_force_restore_data_flag, force_attach); + Base::loadStoredObjects(context_, has_force_restore_data_flag, force_attach, skip_startup_tables); if (!force_attach) materialize_thread.assertMySQLAvailable(); diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index 812a0fb64c8..292edc97878 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -43,7 +43,7 @@ protected: public: String getEngineName() const override { return "MaterializedMySQL"; } - void loadStoredObjects(ContextMutablePtr context_, bool has_force_restore_data_flag, bool force_attach) override; + void loadStoredObjects(ContextMutablePtr context_, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) override; void createTable(ContextPtr context_, const String & name, const StoragePtr & table, const ASTPtr & query) override; diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index 1ebde547e6b..793b4f15d60 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -405,7 +405,7 @@ String DatabaseMySQL::getMetadataPath() const return metadata_path; } -void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, bool, bool /*force_attach*/) +void DatabaseMySQL::loadStoredObjects(ContextMutablePtr, bool, bool /*force_attach*/, bool /* skip_startup_tables */) { std::lock_guard lock{mutex}; diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index ac3417bf6a4..363557fbacb 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -75,7 +75,7 @@ public: void createTable(ContextPtr, const String & table_name, const StoragePtr & storage, const ASTPtr & create_query) override; - void loadStoredObjects(ContextMutablePtr, bool, bool force_attach) override; + void loadStoredObjects(ContextMutablePtr, bool, bool force_attach, bool skip_startup_tables) override; StoragePtr detachTable(const String & table_name) override; diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index fdd181373df..c9ea8d12ef2 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -109,9 +109,10 @@ void DatabaseMaterializedPostgreSQL::startSynchronization() } -void DatabaseMaterializedPostgreSQL::loadStoredObjects(ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach) +void DatabaseMaterializedPostgreSQL::loadStoredObjects( + ContextMutablePtr local_context, bool has_force_restore_data_flag, bool force_attach, bool skip_startup_tables) { - DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach); + DatabaseAtomic::loadStoredObjects(local_context, has_force_restore_data_flag, force_attach, skip_startup_tables); try { @@ -124,7 +125,6 @@ void DatabaseMaterializedPostgreSQL::loadStoredObjects(ContextMutablePtr local_c if (!force_attach) throw; } - } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index dd8b4dc438a..915bf44f1f2 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -43,10 +43,10 @@ public: String getMetadataPath() const override { return metadata_path; } - void loadStoredObjects(ContextMutablePtr, bool, bool force_attach) override; + void loadStoredObjects(ContextMutablePtr, bool, bool force_attach, bool skip_startup_tables) override; - DatabaseTablesIteratorPtr getTablesIterator( - ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr + getTablesIterator(ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; StoragePtr tryGetTable(const String & name, ContextPtr context) const override; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index 7db42b1971f..8dad3aa3a5c 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -280,7 +280,7 @@ void DatabasePostgreSQL::drop(ContextPtr /*context*/) } -void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, bool, bool /*force_attach*/) +void DatabasePostgreSQL::loadStoredObjects(ContextMutablePtr /* context */, bool, bool /*force_attach*/, bool /* skip_startup_tables */) { { std::lock_guard lock{mutex}; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index f863fdcbf3e..629f9eadf2d 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -48,7 +48,7 @@ public: bool empty() const override; - void loadStoredObjects(ContextMutablePtr, bool, bool force_attach) override; + void loadStoredObjects(ContextMutablePtr, bool, bool force_attach, bool skip_startup_tables) override; DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index fd6b5b9a810..373b8835831 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -157,6 +157,15 @@ void DatabaseCatalog::loadDatabases() /// Another background thread which drops temporary LiveViews. /// We should start it after loadMarkedAsDroppedTables() to avoid race condition. TemporaryLiveViewCleaner::instance().startup(); + + /// Start up tables after all databases are loaded. + for (const auto & [database_name, database] : databases) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + continue; + + database->startupTables(); + } } void DatabaseCatalog::shutdownImpl() diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a1313a84c36..7e061662534 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -272,7 +272,8 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) } /// We use global context here, because storages lifetime is bigger than query context lifetime - database->loadStoredObjects(getContext()->getGlobalContext(), has_force_restore_data_flag, create.attach && force_attach); //-V560 + database->loadStoredObjects( + getContext()->getGlobalContext(), has_force_restore_data_flag, create.attach && force_attach, skip_startup_tables); //-V560 } catch (...) { diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 92f2929ea7b..1ef5e0470fc 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -52,6 +52,11 @@ public: force_attach = force_attach_; } + void setSkipStartupTables(bool skip_startup_tables_) + { + skip_startup_tables = skip_startup_tables_; + } + /// Obtain information about columns, their types, default values and column comments, /// for case when columns in CREATE query is specified explicitly. static ColumnsDescription getColumnsDescription(const ASTExpressionList & columns, ContextPtr context, bool attach); @@ -94,6 +99,7 @@ private: /// Is this an internal query - not from the user. bool internal = false; bool force_attach = false; + bool skip_startup_tables = false; mutable String as_database_saved; mutable String as_table_saved; diff --git a/src/Interpreters/loadMetadata.cpp b/src/Interpreters/loadMetadata.cpp index 43f9727c355..458e17ac16b 100644 --- a/src/Interpreters/loadMetadata.cpp +++ b/src/Interpreters/loadMetadata.cpp @@ -43,6 +43,7 @@ static void executeCreateQuery( interpreter.setInternal(true); interpreter.setForceAttach(true); interpreter.setForceRestoreData(has_force_restore_data_flag); + interpreter.setSkipStartupTables(true); interpreter.execute(); } From ff776fd38d68615332553654230240deac600ba8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 30 Aug 2021 05:36:25 +0000 Subject: [PATCH 78/86] Ping CI From 378f4854daad3453e6066aa7bab2ae9fb90bca05 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 30 Aug 2021 05:50:08 +0000 Subject: [PATCH 79/86] Rename --- src/Storages/System/StorageSystemViews.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/System/StorageSystemViews.cpp b/src/Storages/System/StorageSystemViews.cpp index 91f8dfeb258..0bb2724b358 100644 --- a/src/Storages/System/StorageSystemViews.cpp +++ b/src/Storages/System/StorageSystemViews.cpp @@ -22,9 +22,9 @@ NamesAndTypesList StorageSystemViews::getNamesAndTypes() return { {"database", std::make_shared()}, {"name", std::make_shared()}, - {"table_database", std::make_shared()}, - {"table", std::make_shared()}, - {"type", std::move(view_type_datatype)}, + {"main_dependency_database", std::make_shared()}, + {"main_dependency_table", std::make_shared()}, + {"view_type", std::move(view_type_datatype)}, }; } From 8acf0fa16aa7e7f9fc7cec4fe0a0085102006468 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 30 Aug 2021 10:57:40 +0300 Subject: [PATCH 80/86] run generate-ya-make --- src/AggregateFunctions/ya.make | 1 + src/Common/ya.make | 1 - src/Compression/ya.make | 6 ++++++ src/Core/ya.make | 2 ++ src/DataStreams/ya.make | 1 + src/Disks/ya.make | 1 + src/Functions/ya.make | 4 ++++ src/Interpreters/ya.make | 2 ++ src/Parsers/ya.make | 7 +++++++ src/Processors/ya.make | 2 ++ src/Storages/ya.make | 3 +++ 11 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/AggregateFunctions/ya.make b/src/AggregateFunctions/ya.make index d41c0444467..f0374fb69bc 100644 --- a/src/AggregateFunctions/ya.make +++ b/src/AggregateFunctions/ya.make @@ -48,6 +48,7 @@ SRCS( AggregateFunctionSequenceNextNode.cpp AggregateFunctionSimpleLinearRegression.cpp AggregateFunctionSimpleState.cpp + AggregateFunctionSingleValueOrNull.cpp AggregateFunctionState.cpp AggregateFunctionStatistics.cpp AggregateFunctionStatisticsSimple.cpp diff --git a/src/Common/ya.make b/src/Common/ya.make index c3be16c6e07..bf9d3627661 100644 --- a/src/Common/ya.make +++ b/src/Common/ya.make @@ -118,7 +118,6 @@ SRCS( isLocalAddress.cpp isValidUTF8.cpp malloc.cpp - memory.cpp new_delete.cpp parseAddress.cpp parseGlobs.cpp diff --git a/src/Compression/ya.make b/src/Compression/ya.make index 2dafbda262a..420ada412e0 100644 --- a/src/Compression/ya.make +++ b/src/Compression/ya.make @@ -32,8 +32,14 @@ SRCS( CompressionCodecT64.cpp CompressionCodecZSTD.cpp CompressionFactory.cpp + CompressionFactoryAdditions.cpp ICompressionCodec.cpp LZ4_decompress_faster.cpp + fuzzers/compressed_buffer_fuzzer.cpp + fuzzers/delta_decompress_fuzzer.cpp + fuzzers/double_delta_decompress_fuzzer.cpp + fuzzers/encrypted_decompress_fuzzer.cpp + fuzzers/lz4_decompress_fuzzer.cpp getCompressionCodecForFile.cpp ) diff --git a/src/Core/ya.make b/src/Core/ya.make index 6946d7a47bb..3c42163c37b 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -37,12 +37,14 @@ SRCS( PostgreSQL/insertPostgreSQLValue.cpp PostgreSQLProtocol.cpp QueryProcessingStage.cpp + ServerUUID.cpp Settings.cpp SettingsEnums.cpp SettingsFields.cpp SettingsQuirks.cpp SortDescription.cpp UUID.cpp + fuzzers/names_and_types_fuzzer.cpp iostream_debug_helpers.cpp ) diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make index c16db808a5b..7f16e2360c9 100644 --- a/src/DataStreams/ya.make +++ b/src/DataStreams/ya.make @@ -44,6 +44,7 @@ SRCS( SquashingTransform.cpp TTLAggregationAlgorithm.cpp TTLBlockInputStream.cpp + TTLCalcInputStream.cpp TTLColumnAlgorithm.cpp TTLDeleteAlgorithm.cpp TTLUpdateInfoAlgorithm.cpp diff --git a/src/Disks/ya.make b/src/Disks/ya.make index 925dfd2a0ce..5b96df7f77c 100644 --- a/src/Disks/ya.make +++ b/src/Disks/ya.make @@ -23,6 +23,7 @@ SRCS( ReadIndirectBufferFromRemoteFS.cpp SingleDiskVolume.cpp StoragePolicy.cpp + TemporaryFileOnDisk.cpp VolumeJBOD.cpp VolumeRAID1.cpp WriteIndirectBufferFromRemoteFS.cpp diff --git a/src/Functions/ya.make b/src/Functions/ya.make index 5721505b398..fbfff751314 100644 --- a/src/Functions/ya.make +++ b/src/Functions/ya.make @@ -290,6 +290,7 @@ SRCS( geohashesInBox.cpp getMacro.cpp getScalar.cpp + getServerPort.cpp getSetting.cpp getSizeOfEnumType.cpp globalVariable.cpp @@ -444,6 +445,7 @@ SRCS( registerFunctionsNull.cpp registerFunctionsRandom.cpp registerFunctionsReinterpret.cpp + registerFunctionsSnowflake.cpp registerFunctionsString.cpp registerFunctionsStringRegexp.cpp registerFunctionsStringSearch.cpp @@ -477,12 +479,14 @@ SRCS( s2RectIntersection.cpp s2RectUnion.cpp s2ToGeo.cpp + serverUUID.cpp sigmoid.cpp sign.cpp sin.cpp sinh.cpp sleep.cpp sleepEachRow.cpp + snowflake.cpp sqrt.cpp startsWith.cpp stem.cpp diff --git a/src/Interpreters/ya.make b/src/Interpreters/ya.make index 119e3cc5322..e0c0c475dad 100644 --- a/src/Interpreters/ya.make +++ b/src/Interpreters/ya.make @@ -70,6 +70,7 @@ SRCS( InJoinSubqueriesPreprocessor.cpp InternalTextLogsQueue.cpp InterpreterAlterQuery.cpp + InterpreterBackupQuery.cpp InterpreterCheckQuery.cpp InterpreterCreateFunctionQuery.cpp InterpreterCreateQuery.cpp @@ -146,6 +147,7 @@ SRCS( RewriteSumIfFunctionVisitor.cpp RowRefs.cpp SelectIntersectExceptQueryVisitor.cpp + Session.cpp Set.cpp SetVariants.cpp SortedBlocksWriter.cpp diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index e169b812dff..5368f396dca 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -11,6 +11,7 @@ PEERDIR( SRCS( ASTAlterQuery.cpp ASTAsterisk.cpp + ASTBackupQuery.cpp ASTColumnDeclaration.cpp ASTColumnsMatcher.cpp ASTColumnsTransformers.cpp @@ -51,6 +52,7 @@ SRCS( ASTRolesOrUsersSet.cpp ASTRowPolicyName.cpp ASTSampleRatio.cpp + ASTSelectIntersectExceptQuery.cpp ASTSelectQuery.cpp ASTSelectWithUnionQuery.cpp ASTSetQuery.cpp @@ -89,6 +91,7 @@ SRCS( MySQL/ASTDeclareSubPartition.cpp MySQL/ASTDeclareTableOptions.cpp ParserAlterQuery.cpp + ParserBackupQuery.cpp ParserCase.cpp ParserCheckQuery.cpp ParserCreateFunctionQuery.cpp @@ -142,6 +145,10 @@ SRCS( TokenIterator.cpp formatAST.cpp formatSettingName.cpp + fuzzers/create_parser_fuzzer.cpp + fuzzers/lexer_fuzzer.cpp + fuzzers/select_parser_fuzzer.cpp + getInsertQuery.cpp iostream_debug_helpers.cpp makeASTForLogicalFunction.cpp obfuscateQueries.cpp diff --git a/src/Processors/ya.make b/src/Processors/ya.make index db0ae80c742..ffca7a2e8af 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -118,6 +118,7 @@ SRCS( QueryPlan/IQueryPlanStep.cpp QueryPlan/ISourceStep.cpp QueryPlan/ITransformingStep.cpp + QueryPlan/IntersectOrExceptStep.cpp QueryPlan/JoinStep.cpp QueryPlan/LimitByStep.cpp QueryPlan/LimitStep.cpp @@ -165,6 +166,7 @@ SRCS( Transforms/FillingTransform.cpp Transforms/FilterTransform.cpp Transforms/FinishSortingTransform.cpp + Transforms/IntersectOrExceptTransform.cpp Transforms/JoiningTransform.cpp Transforms/LimitByTransform.cpp Transforms/LimitsCheckingTransform.cpp diff --git a/src/Storages/ya.make b/src/Storages/ya.make index b3494849441..e6b8756c7f8 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -213,6 +213,7 @@ SRCS( System/StorageSystemTables.cpp System/StorageSystemUserDirectories.cpp System/StorageSystemUsers.cpp + System/StorageSystemViews.cpp System/StorageSystemWarnings.cpp System/StorageSystemZeros.cpp System/StorageSystemZooKeeper.cpp @@ -220,6 +221,8 @@ SRCS( TTLDescription.cpp VirtualColumnUtils.cpp extractKeyExpressionList.cpp + fuzzers/columns_description_fuzzer.cpp + fuzzers/mergetree_checksum_fuzzer.cpp getStructureOfRemoteTable.cpp registerStorages.cpp transformQueryForExternalDatabase.cpp From 9871ad70ff06f8d7e4ccb498a34828921a1014f9 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 30 Aug 2021 11:12:25 +0300 Subject: [PATCH 81/86] Exclude fuzzers --- src/Access/ya.make.in | 2 +- src/AggregateFunctions/ya.make.in | 2 +- src/Bridge/ya.make.in | 2 +- src/Client/ya.make.in | 2 +- src/Columns/ya.make.in | 2 +- src/Common/ya.make.in | 2 +- src/Compression/ya.make | 5 ----- src/Compression/ya.make.in | 2 +- src/Core/ya.make | 1 - src/Core/ya.make.in | 2 +- src/DataTypes/ya.make.in | 2 +- src/Disks/ya.make.in | 2 +- src/Formats/ya.make.in | 2 +- src/IO/ya.make.in | 2 +- src/Parsers/ya.make | 3 --- src/Parsers/ya.make.in | 2 +- src/Processors/ya.make.in | 2 +- src/Storages/ya.make | 2 -- src/Storages/ya.make.in | 2 +- 19 files changed, 15 insertions(+), 26 deletions(-) diff --git a/src/Access/ya.make.in b/src/Access/ya.make.in index 5fa69cec4bb..7d91972194d 100644 --- a/src/Access/ya.make.in +++ b/src/Access/ya.make.in @@ -8,7 +8,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/AggregateFunctions/ya.make.in b/src/AggregateFunctions/ya.make.in index bc7823509af..78d82238168 100644 --- a/src/AggregateFunctions/ya.make.in +++ b/src/AggregateFunctions/ya.make.in @@ -8,7 +8,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/Bridge/ya.make.in b/src/Bridge/ya.make.in index 5fa69cec4bb..7d91972194d 100644 --- a/src/Bridge/ya.make.in +++ b/src/Bridge/ya.make.in @@ -8,7 +8,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/Client/ya.make.in b/src/Client/ya.make.in index 0ea90f75a6c..ccf46673c66 100644 --- a/src/Client/ya.make.in +++ b/src/Client/ya.make.in @@ -9,7 +9,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/Columns/ya.make.in b/src/Columns/ya.make.in index 3c0df961a79..5d2b0f3d06a 100644 --- a/src/Columns/ya.make.in +++ b/src/Columns/ya.make.in @@ -17,7 +17,7 @@ PEERDIR( ) SRCS( - + ) END() diff --git a/src/Common/ya.make.in b/src/Common/ya.make.in index fd6a805891e..9fd6c4b4708 100644 --- a/src/Common/ya.make.in +++ b/src/Common/ya.make.in @@ -24,7 +24,7 @@ INCLUDE(${ARCADIA_ROOT}/clickhouse/cmake/yandex/ya.make.versions.inc) SRCS( - + ) END() diff --git a/src/Compression/ya.make b/src/Compression/ya.make index 420ada412e0..8ba48696225 100644 --- a/src/Compression/ya.make +++ b/src/Compression/ya.make @@ -35,11 +35,6 @@ SRCS( CompressionFactoryAdditions.cpp ICompressionCodec.cpp LZ4_decompress_faster.cpp - fuzzers/compressed_buffer_fuzzer.cpp - fuzzers/delta_decompress_fuzzer.cpp - fuzzers/double_delta_decompress_fuzzer.cpp - fuzzers/encrypted_decompress_fuzzer.cpp - fuzzers/lz4_decompress_fuzzer.cpp getCompressionCodecForFile.cpp ) diff --git a/src/Compression/ya.make.in b/src/Compression/ya.make.in index 65fe0637600..ec4bf556901 100644 --- a/src/Compression/ya.make.in +++ b/src/Compression/ya.make.in @@ -15,7 +15,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/Core/ya.make b/src/Core/ya.make index 3c42163c37b..2362f88e5f6 100644 --- a/src/Core/ya.make +++ b/src/Core/ya.make @@ -44,7 +44,6 @@ SRCS( SettingsQuirks.cpp SortDescription.cpp UUID.cpp - fuzzers/names_and_types_fuzzer.cpp iostream_debug_helpers.cpp ) diff --git a/src/Core/ya.make.in b/src/Core/ya.make.in index 6b89dc18482..2938fb13e9a 100644 --- a/src/Core/ya.make.in +++ b/src/Core/ya.make.in @@ -10,7 +10,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/DataTypes/ya.make.in b/src/DataTypes/ya.make.in index 1d46c197d64..91a24812170 100644 --- a/src/DataTypes/ya.make.in +++ b/src/DataTypes/ya.make.in @@ -9,7 +9,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/Disks/ya.make.in b/src/Disks/ya.make.in index b803294afbe..303a2118784 100644 --- a/src/Disks/ya.make.in +++ b/src/Disks/ya.make.in @@ -7,7 +7,7 @@ PEERDIR( ) SRCS( - + ) END() diff --git a/src/Formats/ya.make.in b/src/Formats/ya.make.in index 525e582755e..9a257d7846e 100644 --- a/src/Formats/ya.make.in +++ b/src/Formats/ya.make.in @@ -10,7 +10,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/IO/ya.make.in b/src/IO/ya.make.in index 0b579b0df37..3c5460a5378 100644 --- a/src/IO/ya.make.in +++ b/src/IO/ya.make.in @@ -18,7 +18,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/Parsers/ya.make b/src/Parsers/ya.make index 5368f396dca..d17b73104d6 100644 --- a/src/Parsers/ya.make +++ b/src/Parsers/ya.make @@ -145,9 +145,6 @@ SRCS( TokenIterator.cpp formatAST.cpp formatSettingName.cpp - fuzzers/create_parser_fuzzer.cpp - fuzzers/lexer_fuzzer.cpp - fuzzers/select_parser_fuzzer.cpp getInsertQuery.cpp iostream_debug_helpers.cpp makeASTForLogicalFunction.cpp diff --git a/src/Parsers/ya.make.in b/src/Parsers/ya.make.in index 168fe7ceb34..600c2bd9715 100644 --- a/src/Parsers/ya.make.in +++ b/src/Parsers/ya.make.in @@ -8,7 +8,7 @@ PEERDIR( SRCS( - + ) END() diff --git a/src/Processors/ya.make.in b/src/Processors/ya.make.in index 7160e80bcce..ee87d25eafc 100644 --- a/src/Processors/ya.make.in +++ b/src/Processors/ya.make.in @@ -16,7 +16,7 @@ ADDINCL( CFLAGS(-DUSE_ARROW=1) SRCS( - + ) END() diff --git a/src/Storages/ya.make b/src/Storages/ya.make index e6b8756c7f8..b132b8c81cf 100644 --- a/src/Storages/ya.make +++ b/src/Storages/ya.make @@ -221,8 +221,6 @@ SRCS( TTLDescription.cpp VirtualColumnUtils.cpp extractKeyExpressionList.cpp - fuzzers/columns_description_fuzzer.cpp - fuzzers/mergetree_checksum_fuzzer.cpp getStructureOfRemoteTable.cpp registerStorages.cpp transformQueryForExternalDatabase.cpp diff --git a/src/Storages/ya.make.in b/src/Storages/ya.make.in index f7efe5870d3..9a31eef3836 100644 --- a/src/Storages/ya.make.in +++ b/src/Storages/ya.make.in @@ -10,7 +10,7 @@ PEERDIR( SRCS( - + ) END() From 2c8e2c6995e44d264279890f32df69d5bc1cca75 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 30 Aug 2021 12:19:13 +0300 Subject: [PATCH 82/86] Fix arcadia build --- programs/ya.make | 1 + src/Backups/ya.make | 27 +++++++++++++++++++++++++++ src/Backups/ya.make.in | 14 ++++++++++++++ src/Interpreters/Session.cpp | 2 +- src/ya.make | 1 + 5 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 src/Backups/ya.make create mode 100644 src/Backups/ya.make.in diff --git a/programs/ya.make b/programs/ya.make index 2de3052f1d2..66a0b076442 100644 --- a/programs/ya.make +++ b/programs/ya.make @@ -23,6 +23,7 @@ SRCS( client/QueryFuzzer.cpp client/ConnectionParameters.cpp client/Suggest.cpp + client/TestHint.cpp extract-from-config/ExtractFromConfig.cpp server/Server.cpp server/MetricsTransmitter.cpp diff --git a/src/Backups/ya.make b/src/Backups/ya.make new file mode 100644 index 00000000000..96217a56ef0 --- /dev/null +++ b/src/Backups/ya.make @@ -0,0 +1,27 @@ +# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it. +OWNER(g:clickhouse) + +LIBRARY() + +PEERDIR( + clickhouse/src/Common +) + + +SRCS( + BackupEntryConcat.cpp + BackupEntryFromAppendOnlyFile.cpp + BackupEntryFromImmutableFile.cpp + BackupEntryFromMemory.cpp + BackupEntryFromSmallFile.cpp + BackupFactory.cpp + BackupInDirectory.cpp + BackupRenamingConfig.cpp + BackupSettings.cpp + BackupUtils.cpp + hasCompatibleDataToRestoreTable.cpp + renameInCreateQuery.cpp + +) + +END() diff --git a/src/Backups/ya.make.in b/src/Backups/ya.make.in new file mode 100644 index 00000000000..7d91972194d --- /dev/null +++ b/src/Backups/ya.make.in @@ -0,0 +1,14 @@ +OWNER(g:clickhouse) + +LIBRARY() + +PEERDIR( + clickhouse/src/Common +) + + +SRCS( + +) + +END() diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 58a0407be00..4d41ba21026 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -288,7 +288,7 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So #if defined(ARCADIA_BUILD) /// This is harmful field that is used only in foreign "Arcadia" build. if (const auto * basic_credentials = dynamic_cast(&credentials_)) - session_client_info->current_password = basic_credentials->getPassword(); + prepared_client_info->current_password = basic_credentials->getPassword(); #endif } diff --git a/src/ya.make b/src/ya.make index 6537f67d66f..fb6f077502b 100644 --- a/src/ya.make +++ b/src/ya.make @@ -5,6 +5,7 @@ LIBRARY() PEERDIR( clickhouse/src/Access clickhouse/src/AggregateFunctions + clickhouse/src/Backups clickhouse/src/Bridge clickhouse/src/Client clickhouse/src/Columns From 2e33072469ff12fe7a4d95ed4f6a365cfb042ba7 Mon Sep 17 00:00:00 2001 From: Anton Ivashkin Date: Mon, 30 Aug 2021 13:31:46 +0300 Subject: [PATCH 83/86] Fix test_storage_s3/test_put_get_with_globs (cleanup after test) --- tests/integration/test_storage_s3/test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 5908def8297..626b1f967b2 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -326,6 +326,10 @@ def test_put_get_with_globs(started_cluster): assert run_query(instance, query).splitlines() == [ "450\t450\t900\t0.csv\t{bucket}/{max_path}".format(bucket=bucket, max_path=max_path)] + minio = started_cluster.minio_client + for obj in list(minio.list_objects(started_cluster.minio_bucket, prefix='{}/'.format(unique_prefix), recursive=True)): + minio.remove_object(started_cluster.minio_bucket, obj.object_name) + # Test multipart put. @pytest.mark.parametrize("maybe_auth,positive", [ From 87b026aa43bc888191f6bbec975074cc515bd666 Mon Sep 17 00:00:00 2001 From: Onehr7 <38950109+Onehr7@users.noreply.github.com> Date: Mon, 30 Aug 2021 20:08:33 +0800 Subject: [PATCH 84/86] Update in.md --- docs/zh/sql-reference/operators/in.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/sql-reference/operators/in.md b/docs/zh/sql-reference/operators/in.md index f39bd02c309..d76971b0487 100644 --- a/docs/zh/sql-reference/operators/in.md +++ b/docs/zh/sql-reference/operators/in.md @@ -5,7 +5,7 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 # IN 操作符 {#select-in-operators} -该 `IN`, `NOT IN`, `GLOBAL IN`,和 `GLOBAL NOT IN` 运算符是单独复盖的,因为它们的功能相当丰富。 +该 `IN`, `NOT IN`, `GLOBAL IN`,和 `GLOBAL NOT IN` 运算符是单独考虑的,因为它们的功能相当丰富。 运算符的左侧是单列或元组。 From 54eeeb7cf255970e9af4d667b689e3379b126a5d Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov Date: Mon, 30 Aug 2021 16:58:40 +0300 Subject: [PATCH 85/86] Add compat between SinkToStorage and BlockOutputStream --- src/Processors/Sources/SinkToOutputStream.cpp | 2 +- src/Processors/Sources/SinkToOutputStream.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Sources/SinkToOutputStream.cpp b/src/Processors/Sources/SinkToOutputStream.cpp index 7612ba10fb3..f8a890a0d9d 100644 --- a/src/Processors/Sources/SinkToOutputStream.cpp +++ b/src/Processors/Sources/SinkToOutputStream.cpp @@ -6,7 +6,7 @@ namespace DB { SinkToOutputStream::SinkToOutputStream(BlockOutputStreamPtr stream_) - : ISink(stream_->getHeader()) + : SinkToStorage(stream_->getHeader()) , stream(std::move(stream_)) { stream->writePrefix(); diff --git a/src/Processors/Sources/SinkToOutputStream.h b/src/Processors/Sources/SinkToOutputStream.h index 5362608551f..946a53b685e 100644 --- a/src/Processors/Sources/SinkToOutputStream.h +++ b/src/Processors/Sources/SinkToOutputStream.h @@ -1,5 +1,5 @@ #pragma once -#include +#include namespace DB { @@ -9,7 +9,7 @@ using BlockOutputStreamPtr = std::shared_ptr; /// Sink which writes data to IBlockOutputStream. /// It's a temporary wrapper. -class SinkToOutputStream : public ISink +class SinkToOutputStream : public SinkToStorage { public: explicit SinkToOutputStream(BlockOutputStreamPtr stream); From 2198548ead984e0d73626494244a6a743e6f41e0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 31 Aug 2021 00:16:19 +0300 Subject: [PATCH 86/86] Remove unused header --- src/Core/ExternalTable.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 9b53cd79a84..22ea5e4f60b 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -12,7 +12,6 @@ #include #include -#include #include #include #include