diff --git a/base/base/defines.h b/base/base/defines.h index 5685a6d9833..a0c3c0d1de5 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -145,6 +145,7 @@ #define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure #define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability #define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability +#define TSA_RETURN_CAPABILITY(...) __attribute__((lock_returned(__VA_ARGS__))) /// to return capabilities in functions /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) /// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index bf67db8ff2e..e73f61dde83 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -12,9 +12,12 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include @@ -43,29 +46,24 @@ namespace CurrentMetrics namespace { -/// Outputs sizes of uncompressed and compressed blocks for compressed file. +/// Outputs method, sizes of uncompressed and compressed blocks for compressed file. void checkAndWriteHeader(DB::ReadBuffer & in, DB::WriteBuffer & out) { while (!in.eof()) { - in.ignore(16); /// checksum - - char header[COMPRESSED_BLOCK_HEADER_SIZE]; - in.readStrict(header, COMPRESSED_BLOCK_HEADER_SIZE); - - UInt32 size_compressed = unalignedLoad(&header[1]); + UInt32 size_compressed; + UInt32 size_decompressed; + auto codec = DB::getCompressionCodecForFile(in, size_compressed, size_decompressed, true /* skip_to_next_block */); if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) throw DB::Exception(DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED, "Too large size_compressed. Most likely corrupted data."); - UInt32 size_decompressed = unalignedLoad(&header[5]); - + DB::writeText(queryToString(codec->getFullCodecDesc()), out); + DB::writeChar('\t', out); DB::writeText(size_decompressed, out); DB::writeChar('\t', out); DB::writeText(size_compressed, out); DB::writeChar('\n', out); - - in.ignore(size_compressed - COMPRESSED_BLOCK_HEADER_SIZE); } } diff --git a/src/Common/LockGuard.h b/src/Common/LockGuard.h index 8a98c5f553a..03c8a3e7617 100644 --- a/src/Common/LockGuard.h +++ b/src/Common/LockGuard.h @@ -1,23 +1,47 @@ #pragma once -#include #include +#include +#include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +}; + /** LockGuard provides RAII-style locking mechanism for a mutex. - ** It's intended to be used like std::unique_ptr but with TSA annotations + ** It's intended to be used like std::unique_lock but with TSA annotations */ template class TSA_SCOPED_LOCKABLE LockGuard { public: - explicit LockGuard(Mutex & mutex_) TSA_ACQUIRE(mutex_) : mutex(mutex_) { mutex.lock(); } - ~LockGuard() TSA_RELEASE() { mutex.unlock(); } + explicit LockGuard(Mutex & mutex_) TSA_ACQUIRE(mutex_) : mutex(mutex_) { lock(); } + ~LockGuard() TSA_RELEASE() { if (locked) unlock(); } + + void lock() TSA_ACQUIRE() + { + /// Don't allow recursive_mutex for now. + if (locked) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't lock twice the same mutex"); + mutex.lock(); + locked = true; + } + + void unlock() TSA_RELEASE() + { + if (!locked) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't unlock the mutex without locking it first"); + mutex.unlock(); + locked = false; + } private: Mutex & mutex; + bool locked = false; }; template typename TLockGuard, typename Mutex> diff --git a/src/Compression/getCompressionCodecForFile.cpp b/src/Compression/getCompressionCodecForFile.cpp index 027ee0ac57a..b04e4b6371a 100644 --- a/src/Compression/getCompressionCodecForFile.cpp +++ b/src/Compression/getCompressionCodecForFile.cpp @@ -10,33 +10,50 @@ namespace DB { - using Checksum = CityHash_v1_0_2::uint128; -CompressionCodecPtr getCompressionCodecForFile(const IDataPartStorage & data_part_storage, const String & relative_path) +CompressionCodecPtr +getCompressionCodecForFile(ReadBuffer & read_buffer, UInt32 & size_compressed, UInt32 & size_decompressed, bool skip_to_next_block) { - auto read_buffer = data_part_storage.readFile(relative_path, {}, std::nullopt, std::nullopt); - read_buffer->ignore(sizeof(Checksum)); + read_buffer.ignore(sizeof(Checksum)); UInt8 header_size = ICompressionCodec::getHeaderSize(); + size_t starting_bytes = read_buffer.count(); PODArray compressed_buffer; compressed_buffer.resize(header_size); - read_buffer->readStrict(compressed_buffer.data(), header_size); + read_buffer.readStrict(compressed_buffer.data(), header_size); uint8_t method = ICompressionCodec::readMethod(compressed_buffer.data()); + size_compressed = unalignedLoad(&compressed_buffer[1]); + size_decompressed = unalignedLoad(&compressed_buffer[5]); if (method == static_cast(CompressionMethodByte::Multiple)) { compressed_buffer.resize(1); - read_buffer->readStrict(compressed_buffer.data(), 1); + read_buffer.readStrict(compressed_buffer.data(), 1); compressed_buffer.resize(1 + compressed_buffer[0]); - read_buffer->readStrict(compressed_buffer.data() + 1, compressed_buffer[0]); + read_buffer.readStrict(compressed_buffer.data() + 1, compressed_buffer[0]); auto codecs_bytes = CompressionCodecMultiple::getCodecsBytesFromData(compressed_buffer.data()); Codecs codecs; for (auto byte : codecs_bytes) codecs.push_back(CompressionCodecFactory::instance().get(byte)); + if (skip_to_next_block) + read_buffer.ignore(size_compressed - (read_buffer.count() - starting_bytes)); + return std::make_shared(codecs); } + + if (skip_to_next_block) + read_buffer.ignore(size_compressed - (read_buffer.count() - starting_bytes)); + return CompressionCodecFactory::instance().get(method); } +CompressionCodecPtr getCompressionCodecForFile(const IDataPartStorage & data_part_storage, const String & relative_path) +{ + auto read_buffer = data_part_storage.readFile(relative_path, {}, std::nullopt, std::nullopt); + UInt32 size_compressed; + UInt32 size_decompressed; + return getCompressionCodecForFile(*read_buffer, size_compressed, size_decompressed, false); +} + } diff --git a/src/Compression/getCompressionCodecForFile.h b/src/Compression/getCompressionCodecForFile.h index b6f22750e4d..535befa37e1 100644 --- a/src/Compression/getCompressionCodecForFile.h +++ b/src/Compression/getCompressionCodecForFile.h @@ -13,4 +13,8 @@ namespace DB /// from metadata. CompressionCodecPtr getCompressionCodecForFile(const IDataPartStorage & data_part_storage, const String & relative_path); +/// Same as above which is used by clickhouse-compressor to print compression statistics of each data block. +CompressionCodecPtr +getCompressionCodecForFile(ReadBuffer & read_buffer, UInt32 & size_compressed, UInt32 & size_decompressed, bool skip_to_next_block); + } diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index a963389a10a..6ac9efccffe 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -4560,7 +4560,7 @@ Possible values: - 0 - Disable - 1 - Enable )", 0) \ - DECLARE(Bool, query_plan_merge_filters, false, R"( + DECLARE(Bool, query_plan_merge_filters, true, R"( Allow to merge filters in the query plan )", 0) \ DECLARE(Bool, query_plan_filter_push_down, true, R"( @@ -4861,9 +4861,9 @@ Allows to record the filesystem caching log for each query DECLARE(Bool, read_from_filesystem_cache_if_exists_otherwise_bypass_cache, false, R"( Allow to use the filesystem cache in passive mode - benefit from the existing cache entries, but don't put more entries into the cache. If you set this setting for heavy ad-hoc queries and leave it disabled for short real-time queries, this will allows to avoid cache threshing by too heavy queries and to improve the overall system efficiency. )", 0) \ - DECLARE(Bool, skip_download_if_exceeds_query_cache, true, R"( + DECLARE(Bool, filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit, true, R"( Skip download from remote filesystem if exceeds query cache size -)", 0) \ +)", 0) ALIAS(skip_download_if_exceeds_query_cache) \ DECLARE(UInt64, filesystem_cache_max_download_size, (128UL * 1024 * 1024 * 1024), R"( Max remote filesystem cache size that can be downloaded by a single query )", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 4fb1f6ca311..86a65e2ebd4 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -76,8 +76,10 @@ static std::initializer_listremoveKeyIfExists(getCacheKey(path_key_for_cache), FileCache::getCommonUser().user_id); } -void CachedObjectStorage::removeObject(const StoredObject & object) -{ - removeCacheIfExists(object.remote_path); - object_storage->removeObject(object); -} - -void CachedObjectStorage::removeObjects(const StoredObjects & objects) -{ - for (const auto & object : objects) - removeCacheIfExists(object.remote_path); - - object_storage->removeObjects(objects); -} - void CachedObjectStorage::removeObjectIfExists(const StoredObject & object) { removeCacheIfExists(object.remote_path); diff --git a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h index b77baf21e40..77aa635b89b 100644 --- a/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h +++ b/src/Disks/ObjectStorages/Cached/CachedObjectStorage.h @@ -45,10 +45,6 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - void removeObject(const StoredObject & object) override; - - void removeObjects(const StoredObjects & objects) override; - void removeObjectIfExists(const StoredObject & object) override; void removeObjectsIfExist(const StoredObjects & objects) override; diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp index 64323fb6f3c..19de2bb78af 100644 --- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp +++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp @@ -480,8 +480,7 @@ struct WriteFileObjectStorageOperation final : public IDiskObjectStorageOperatio void undo() override { - if (object_storage.exists(object)) - object_storage.removeObject(object); + object_storage.removeObjectIfExists(object); } void finalize() override @@ -543,8 +542,7 @@ struct CopyFileObjectStorageOperation final : public IDiskObjectStorageOperation void undo() override { - for (const auto & object : created_objects) - destination_object_storage.removeObject(object); + destination_object_storage.removeObjectsIfExist(created_objects); } void finalize() override diff --git a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h index b53161beb76..7d6c914c398 100644 --- a/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h +++ b/src/Disks/ObjectStorages/HDFS/HDFSObjectStorage.h @@ -77,11 +77,6 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - /// Remove file. Throws exception if file doesn't exists or it's a directory. - void removeObject(const StoredObject & object) override; - - void removeObjects(const StoredObjects & objects) override; - void removeObjectIfExists(const StoredObject & object) override; void removeObjectsIfExist(const StoredObjects & objects) override; @@ -117,6 +112,11 @@ private: void initializeHDFSFS() const; std::string extractObjectKeyFromURL(const StoredObject & object) const; + /// Remove file. Throws exception if file doesn't exists or it's a directory. + void removeObject(const StoredObject & object); + + void removeObjects(const StoredObjects & objects); + const Poco::Util::AbstractConfiguration & config; mutable HDFSBuilderWrapper hdfs_builder; diff --git a/src/Disks/ObjectStorages/IObjectStorage.h b/src/Disks/ObjectStorages/IObjectStorage.h index 8dde96b8b16..adb36762539 100644 --- a/src/Disks/ObjectStorages/IObjectStorage.h +++ b/src/Disks/ObjectStorages/IObjectStorage.h @@ -161,11 +161,11 @@ public: virtual bool isRemote() const = 0; /// Remove object. Throws exception if object doesn't exists. - virtual void removeObject(const StoredObject & object) = 0; + // virtual void removeObject(const StoredObject & object) = 0; /// Remove multiple objects. Some object storages can do batch remove in a more /// optimal way. - virtual void removeObjects(const StoredObjects & objects) = 0; + // virtual void removeObjects(const StoredObjects & objects) = 0; /// Remove object on path if exists virtual void removeObjectIfExists(const StoredObject & object) = 0; diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp index 5f1b6aedc72..f24501dc60e 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.cpp @@ -81,7 +81,7 @@ std::unique_ptr LocalObjectStorage::writeObject( /// NO return std::make_unique(object.remote_path, buf_size); } -void LocalObjectStorage::removeObject(const StoredObject & object) +void LocalObjectStorage::removeObject(const StoredObject & object) const { /// For local object storage files are actually removed when "metadata" is removed. if (!exists(object)) @@ -91,7 +91,7 @@ void LocalObjectStorage::removeObject(const StoredObject & object) ErrnoException::throwFromPath(ErrorCodes::CANNOT_UNLINK, object.remote_path, "Cannot unlink file {}", object.remote_path); } -void LocalObjectStorage::removeObjects(const StoredObjects & objects) +void LocalObjectStorage::removeObjects(const StoredObjects & objects) const { for (const auto & object : objects) removeObject(object); diff --git a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h index f1a0391a984..5b3c3951364 100644 --- a/src/Disks/ObjectStorages/Local/LocalObjectStorage.h +++ b/src/Disks/ObjectStorages/Local/LocalObjectStorage.h @@ -42,10 +42,6 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - void removeObject(const StoredObject & object) override; - - void removeObjects(const StoredObjects & objects) override; - void removeObjectIfExists(const StoredObject & object) override; void removeObjectsIfExist(const StoredObjects & objects) override; @@ -82,6 +78,10 @@ public: ReadSettings patchSettings(const ReadSettings & read_settings) const override; private: + void removeObject(const StoredObject & object) const; + + void removeObjects(const StoredObjects & objects) const; + String key_prefix; LoggerPtr log; std::string description; diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp index d56c5d9143c..27aa9304de7 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorage.cpp @@ -203,7 +203,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::unlinkFile(const std::str { auto object_key = metadata_storage.object_storage->generateObjectKeyForPath(path, std::nullopt /* key_prefix */); auto object = StoredObject(object_key.serialize()); - metadata_storage.object_storage->removeObject(object); + metadata_storage.object_storage->removeObjectIfExists(object); } void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std::string & path) @@ -211,7 +211,7 @@ void MetadataStorageFromPlainObjectStorageTransaction::removeDirectory(const std if (metadata_storage.object_storage->isWriteOnce()) { for (auto it = metadata_storage.iterateDirectory(path); it->isValid(); it->next()) - metadata_storage.object_storage->removeObject(StoredObject(it->path())); + metadata_storage.object_storage->removeObjectIfExists(StoredObject(it->path())); } else { diff --git a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp index ea57d691908..62015631aa5 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromPlainObjectStorageOperations.cpp @@ -107,7 +107,7 @@ void MetadataStorageFromPlainObjectStorageCreateDirectoryOperation::undo(std::un auto metric = object_storage->getMetadataStorageMetrics().directory_map_size; CurrentMetrics::sub(metric, 1); - object_storage->removeObject(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); + object_storage->removeObjectIfExists(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); } else if (write_created) object_storage->removeObjectIfExists(StoredObject(metadata_object_key.serialize(), path / PREFIX_PATH_FILE_NAME)); @@ -247,7 +247,7 @@ void MetadataStorageFromPlainObjectStorageRemoveDirectoryOperation::execute(std: auto metadata_object_key = createMetadataObjectKey(key_prefix, metadata_key_prefix); auto metadata_object = StoredObject(/*remote_path*/ metadata_object_key.serialize(), /*local_path*/ path / PREFIX_PATH_FILE_NAME); - object_storage->removeObject(metadata_object); + object_storage->removeObjectIfExists(metadata_object); { std::lock_guard lock(path_map.mutex); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 47ef97401f2..9fca3cad688 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -326,21 +326,11 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e ProfileEvents::DiskS3DeleteObjects); } -void S3ObjectStorage::removeObject(const StoredObject & object) -{ - removeObjectImpl(object, false); -} - void S3ObjectStorage::removeObjectIfExists(const StoredObject & object) { removeObjectImpl(object, true); } -void S3ObjectStorage::removeObjects(const StoredObjects & objects) -{ - removeObjectsImpl(objects, false); -} - void S3ObjectStorage::removeObjectsIfExist(const StoredObjects & objects) { removeObjectsImpl(objects, true); diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h index d6e84cf57ef..4b9c968ede9 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.h +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.h @@ -101,13 +101,6 @@ public: ObjectStorageIteratorPtr iterate(const std::string & path_prefix, size_t max_keys) const override; - /// Uses `DeleteObjectRequest`. - void removeObject(const StoredObject & object) override; - - /// Uses `DeleteObjectsRequest` if it is allowed by `s3_capabilities`, otherwise `DeleteObjectRequest`. - /// `DeleteObjectsRequest` is not supported on GCS, see https://issuetracker.google.com/issues/162653700 . - void removeObjects(const StoredObjects & objects) override; - /// Uses `DeleteObjectRequest`. void removeObjectIfExists(const StoredObject & object) override; diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp index 871d3b506f6..35abc0ed0df 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.cpp @@ -254,16 +254,6 @@ std::unique_ptr WebObjectStorage::writeObject( /// NOLI throwNotAllowed(); } -void WebObjectStorage::removeObject(const StoredObject &) -{ - throwNotAllowed(); -} - -void WebObjectStorage::removeObjects(const StoredObjects &) -{ - throwNotAllowed(); -} - void WebObjectStorage::removeObjectIfExists(const StoredObject &) { throwNotAllowed(); diff --git a/src/Disks/ObjectStorages/Web/WebObjectStorage.h b/src/Disks/ObjectStorages/Web/WebObjectStorage.h index 573221b7e21..1e612bd359c 100644 --- a/src/Disks/ObjectStorages/Web/WebObjectStorage.h +++ b/src/Disks/ObjectStorages/Web/WebObjectStorage.h @@ -47,10 +47,6 @@ public: size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, const WriteSettings & write_settings = {}) override; - void removeObject(const StoredObject & object) override; - - void removeObjects(const StoredObjects & objects) override; - void removeObjectIfExists(const StoredObject & object) override; void removeObjectsIfExist(const StoredObjects & objects) override; diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index bcb9e0641b8..c9d3ccb4445 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -1035,6 +1035,9 @@ private: size_t tuple_size, size_t input_rows_count) const { + if (0 == tuple_size) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Comparison of zero-sized tuples is not implemented"); + ColumnsWithTypeAndName less_columns(tuple_size); ColumnsWithTypeAndName equal_columns(tuple_size - 1); ColumnsWithTypeAndName tmp_columns(2); diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index e03b27b3c39..5e1e7067e86 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -668,6 +668,9 @@ private: temporary_columns[0] = arguments[0]; size_t tuple_size = type1.getElements().size(); + if (tuple_size == 0) + return ColumnTuple::create(input_rows_count); + Columns tuple_columns(tuple_size); for (size_t i = 0; i < tuple_size; ++i) diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index c1747314c76..b66b9867e39 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -69,7 +69,7 @@ struct ReadSettings std::shared_ptr page_cache; size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024); - bool skip_download_if_exceeds_query_cache = true; + bool filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit = true; size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 7c6a21941eb..aefe3ff338c 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -37,7 +37,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// Case when bucket name represented in domain name of S3 URL. /// E.g. (https://bucket-name.s3.region.amazonaws.com/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access - static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))"); + static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss-data-acc|oss|eos)([.\-][a-z0-9\-.:]+))"); /// Case when AWS Private Link Interface is being used /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) @@ -115,7 +115,15 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) && re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri)) { is_virtual_hosted_style = true; - endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri; + if (name == "oss-data-acc") + { + bucket = bucket.substr(0, bucket.find('.')); + endpoint = uri.getScheme() + "://" + uri.getHost().substr(bucket.length() + 1); + } + else + { + endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri; + } validateBucket(bucket, uri); if (!uri.getPath().empty()) diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 8696fab0616..6167313b634 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -212,6 +212,22 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ("", uri.version_id); ASSERT_EQ(true, uri.is_virtual_hosted_style); } + { + S3::URI uri("https://bucket-test1.oss-cn-beijing-internal.aliyuncs.com/ab-test"); + ASSERT_EQ("https://oss-cn-beijing-internal.aliyuncs.com", uri.endpoint); + ASSERT_EQ("bucket-test1", uri.bucket); + ASSERT_EQ("ab-test", uri.key); + ASSERT_EQ("", uri.version_id); + ASSERT_EQ(true, uri.is_virtual_hosted_style); + } + { + S3::URI uri("https://bucket-test.cn-beijing-internal.oss-data-acc.aliyuncs.com/ab-test"); + ASSERT_EQ("https://cn-beijing-internal.oss-data-acc.aliyuncs.com", uri.endpoint); + ASSERT_EQ("bucket-test", uri.bucket); + ASSERT_EQ("ab-test", uri.key); + ASSERT_EQ("", uri.version_id); + ASSERT_EQ(true, uri.is_virtual_hosted_style); + } } TEST(S3UriTest, versionIdChecks) diff --git a/src/Interpreters/Cache/QueryLimit.cpp b/src/Interpreters/Cache/QueryLimit.cpp index b18d23a5b7f..a7c964022a5 100644 --- a/src/Interpreters/Cache/QueryLimit.cpp +++ b/src/Interpreters/Cache/QueryLimit.cpp @@ -53,7 +53,7 @@ FileCacheQueryLimit::QueryContextPtr FileCacheQueryLimit::getOrSetQueryContext( { it->second = std::make_shared( settings.filesystem_cache_max_download_size, - !settings.skip_download_if_exceeds_query_cache); + !settings.filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit); } return it->second; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d42002bf98d..d2aad0a52d8 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -237,7 +237,7 @@ namespace Setting extern const SettingsUInt64 remote_fs_read_backoff_max_tries; extern const SettingsUInt64 remote_read_min_bytes_for_seek; extern const SettingsBool throw_on_error_from_cache_on_write_operations; - extern const SettingsBool skip_download_if_exceeds_query_cache; + extern const SettingsBool filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit; extern const SettingsBool s3_allow_parallel_part_upload; extern const SettingsBool use_page_cache_for_disks_without_file_cache; extern const SettingsUInt64 use_structure_from_insertion_table_in_table_functions; @@ -5755,7 +5755,7 @@ ReadSettings Context::getReadSettings() const res.filesystem_cache_prefer_bigger_buffer_size = settings_ref[Setting::filesystem_cache_prefer_bigger_buffer_size]; res.filesystem_cache_max_download_size = settings_ref[Setting::filesystem_cache_max_download_size]; - res.skip_download_if_exceeds_query_cache = settings_ref[Setting::skip_download_if_exceeds_query_cache]; + res.filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit = settings_ref[Setting::filesystem_cache_skip_download_if_exceeds_per_query_cache_write_limit]; res.page_cache = getPageCache(); res.use_page_cache_for_disks_without_file_cache = settings_ref[Setting::use_page_cache_for_disks_without_file_cache]; diff --git a/src/Interpreters/QueryMetricLog.cpp b/src/Interpreters/QueryMetricLog.cpp index 5ab3fe590e0..52d773b7d1b 100644 --- a/src/Interpreters/QueryMetricLog.cpp +++ b/src/Interpreters/QueryMetricLog.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -16,7 +17,6 @@ #include #include -#include namespace DB @@ -24,6 +24,15 @@ namespace DB static auto logger = getLogger("QueryMetricLog"); +String timePointToString(QueryMetricLog::TimePoint time) +{ + /// fmtlib supports subsecond formatting in 10.0.0. We're in 9.1.0, so we need to add the milliseconds ourselves. + auto seconds = std::chrono::time_point_cast(time); + auto microseconds = std::chrono::duration_cast(time - seconds).count(); + + return fmt::format("{:%Y.%m.%d %H:%M:%S}.{:06}", seconds, microseconds); +} + ColumnsDescription QueryMetricLogElement::getColumnsDescription() { ColumnsDescription result; @@ -87,36 +96,73 @@ void QueryMetricLog::shutdown() Base::shutdown(); } -void QueryMetricLog::startQuery(const String & query_id, TimePoint start_time, UInt64 interval_milliseconds) +void QueryMetricLog::collectMetric(const ProcessList & process_list, String query_id) { - QueryMetricLogStatus status; - status.interval_milliseconds = interval_milliseconds; - status.next_collect_time = start_time + std::chrono::milliseconds(interval_milliseconds); + auto current_time = std::chrono::system_clock::now(); + const auto query_info = process_list.getQueryInfo(query_id, false, true, false); + if (!query_info) + { + /// TODO: remove trace before 24.11 release after checking everything is fine on the CI + LOG_TRACE(logger, "Query {} is not running anymore, so we couldn't get its QueryStatusInfo", query_id); + return; + } + + LockGuard global_lock(queries_mutex); + auto it = queries.find(query_id); + + /// The query might have finished while the scheduled task is running. + if (it == queries.end()) + { + global_lock.unlock(); + /// TODO: remove trace before 24.11 release after checking everything is fine on the CI + LOG_TRACE(logger, "Query {} not found in the list. Finished while this collecting task was running", query_id); + return; + } + + auto & query_status = it->second; + if (!query_status.mutex) + { + global_lock.unlock(); + /// TODO: remove trace before 24.11 release after checking everything is fine on the CI + LOG_TRACE(logger, "Query {} finished while this collecting task was running", query_id); + return; + } + + LockGuard query_lock(query_status.getMutex()); + global_lock.unlock(); + + auto elem = query_status.createLogMetricElement(query_id, *query_info, current_time); + if (elem) + add(std::move(elem.value())); +} + +/// We use TSA_NO_THREAD_SAFETY_ANALYSIS to prevent TSA complaining that we're modifying the query_status fields +/// without locking the mutex. Since we're building it from scratch, there's no harm in not holding it. +/// If we locked it to make TSA happy, TSAN build would falsely complain about +/// lock-order-inversion (potential deadlock) +/// which is not a real issue since QueryMetricLogStatus's mutex cannot be locked by anything else +/// until we add it to the queries map. +void QueryMetricLog::startQuery(const String & query_id, TimePoint start_time, UInt64 interval_milliseconds) TSA_NO_THREAD_SAFETY_ANALYSIS +{ + QueryMetricLogStatus query_status; + QueryMetricLogStatusInfo & info = query_status.info; + info.interval_milliseconds = interval_milliseconds; + info.next_collect_time = start_time; auto context = getContext(); const auto & process_list = context->getProcessList(); - status.task = context->getSchedulePool().createTask("QueryMetricLog", [this, &process_list, query_id] { - auto current_time = std::chrono::system_clock::now(); - const auto query_info = process_list.getQueryInfo(query_id, false, true, false); - if (!query_info) - { - LOG_TRACE(logger, "Query {} is not running anymore, so we couldn't get its QueryStatusInfo", query_id); - return; - } - - auto elem = createLogMetricElement(query_id, *query_info, current_time); - if (elem) - add(std::move(elem.value())); + info.task = context->getSchedulePool().createTask("QueryMetricLog", [this, &process_list, query_id] { + collectMetric(process_list, query_id); }); - std::lock_guard lock(queries_mutex); - status.task->scheduleAfter(interval_milliseconds); - queries.emplace(query_id, std::move(status)); + LockGuard global_lock(queries_mutex); + query_status.scheduleNext(query_id); + queries.emplace(query_id, std::move(query_status)); } void QueryMetricLog::finishQuery(const String & query_id, TimePoint finish_time, QueryStatusInfoPtr query_info) { - std::unique_lock lock(queries_mutex); + LockGuard global_lock(queries_mutex); auto it = queries.find(query_id); /// finishQuery may be called from logExceptionBeforeStart when the query has not even started @@ -124,9 +170,19 @@ void QueryMetricLog::finishQuery(const String & query_id, TimePoint finish_time, if (it == queries.end()) return; + auto & query_status = it->second; + decltype(query_status.mutex) query_mutex; + LockGuard query_lock(query_status.getMutex()); + + /// Move the query mutex here so that we hold it until the end, after removing the query from queries. + query_mutex = std::move(query_status.mutex); + query_status.mutex = {}; + + global_lock.unlock(); + if (query_info) { - auto elem = createLogMetricElement(query_id, *query_info, finish_time, false); + auto elem = query_status.createLogMetricElement(query_id, *query_info, finish_time, false); if (elem) add(std::move(elem.value())); } @@ -139,51 +195,58 @@ void QueryMetricLog::finishQuery(const String & query_id, TimePoint finish_time, /// that order. { /// Take ownership of the task so that we can destroy it in this scope after unlocking `queries_mutex`. - auto task = std::move(it->second.task); + auto task = std::move(query_status.info.task); /// Build an empty task for the old task to make sure it does not lock any mutex on its destruction. - it->second.task = {}; + query_status.info.task = {}; + query_lock.unlock(); + global_lock.lock(); queries.erase(query_id); /// Ensure `queries_mutex` is unlocked before calling task's destructor at the end of this /// scope which will lock `exec_mutex`. - lock.unlock(); + global_lock.unlock(); } } -std::optional QueryMetricLog::createLogMetricElement(const String & query_id, const QueryStatusInfo & query_info, TimePoint query_info_time, bool schedule_next) +void QueryMetricLogStatus::scheduleNext(String query_id) { - /// fmtlib supports subsecond formatting in 10.0.0. We're in 9.1.0, so we need to add the milliseconds ourselves. - auto seconds = std::chrono::time_point_cast(query_info_time); - auto microseconds = std::chrono::duration_cast(query_info_time - seconds).count(); - LOG_DEBUG(logger, "Collecting query_metric_log for query {} with QueryStatusInfo from {:%Y.%m.%d %H:%M:%S}.{:06}. Schedule next: {}", query_id, seconds, microseconds, schedule_next); - - std::unique_lock lock(queries_mutex); - auto query_status_it = queries.find(query_id); - - /// The query might have finished while the scheduled task is running. - if (query_status_it == queries.end()) + info.next_collect_time += std::chrono::milliseconds(info.interval_milliseconds); + const auto now = std::chrono::system_clock::now(); + if (info.next_collect_time > now) { - lock.unlock(); - LOG_TRACE(logger, "Query {} finished already while this collecting task was running", query_id); - return {}; + const auto wait_time = std::chrono::duration_cast(info.next_collect_time - now).count(); + info.task->scheduleAfter(wait_time); } - - auto & query_status = query_status_it->second; - if (query_info_time <= query_status.last_collect_time) + else { - lock.unlock(); + LOG_TRACE(logger, "The next collecting task for query {} should have already run at {}. Scheduling it right now", + query_id, timePointToString(info.next_collect_time)); + info.task->schedule(); + } +} + +std::optional QueryMetricLogStatus::createLogMetricElement(const String & query_id, const QueryStatusInfo & query_info, TimePoint query_info_time, bool schedule_next) +{ + /// TODO: remove trace before 24.11 release after checking everything is fine on the CI + LOG_TRACE(logger, "Collecting query_metric_log for query {} and interval {} ms with QueryStatusInfo from {}. Next collection time: {}", + query_id, info.interval_milliseconds, timePointToString(query_info_time), + schedule_next ? timePointToString(info.next_collect_time + std::chrono::milliseconds(info.interval_milliseconds)) : "finished"); + + if (query_info_time <= info.last_collect_time) + { + /// TODO: remove trace before 24.11 release after checking everything is fine on the CI LOG_TRACE(logger, "Query {} has a more recent metrics collected. Skipping this one", query_id); return {}; } - query_status.last_collect_time = query_info_time; + info.last_collect_time = query_info_time; QueryMetricLogElement elem; elem.event_time = timeInSeconds(query_info_time); elem.event_time_microseconds = timeInMicroseconds(query_info_time); - elem.query_id = query_status_it->first; + elem.query_id = query_id; elem.memory_usage = query_info.memory_usage > 0 ? query_info.memory_usage : 0; elem.peak_memory_usage = query_info.peak_memory_usage > 0 ? query_info.peak_memory_usage : 0; @@ -192,7 +255,7 @@ std::optional QueryMetricLog::createLogMetricElement(cons for (ProfileEvents::Event i = ProfileEvents::Event(0), end = ProfileEvents::end(); i < end; ++i) { const auto & new_value = (*(query_info.profile_counters))[i]; - auto & old_value = query_status.last_profile_events[i]; + auto & old_value = info.last_profile_events[i]; /// Profile event counters are supposed to be monotonic. However, at least the `NetworkReceiveBytes` can be inaccurate. /// So, since in the future the counter should always have a bigger value than in the past, we skip this event. @@ -208,16 +271,13 @@ std::optional QueryMetricLog::createLogMetricElement(cons } else { - LOG_TRACE(logger, "Query {} has no profile counters", query_id); + /// TODO: remove trace before 24.11 release after checking everything is fine on the CI + LOG_DEBUG(logger, "Query {} has no profile counters", query_id); elem.profile_events = std::vector(ProfileEvents::end()); } if (schedule_next) - { - query_status.next_collect_time += std::chrono::milliseconds(query_status.interval_milliseconds); - const auto wait_time = std::chrono::duration_cast(query_status.next_collect_time - std::chrono::system_clock::now()).count(); - query_status.task->scheduleAfter(wait_time); - } + scheduleNext(query_id); return elem; } diff --git a/src/Interpreters/QueryMetricLog.h b/src/Interpreters/QueryMetricLog.h index 802cee7bf26..9371dfbb6b5 100644 --- a/src/Interpreters/QueryMetricLog.h +++ b/src/Interpreters/QueryMetricLog.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -11,11 +12,17 @@ #include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +}; + /** QueryMetricLogElement is a log of query metric values measured at regular time interval. */ @@ -34,7 +41,7 @@ struct QueryMetricLogElement void appendToBlock(MutableColumns & columns) const; }; -struct QueryMetricLogStatus +struct QueryMetricLogStatusInfo { UInt64 interval_milliseconds; std::chrono::system_clock::time_point last_collect_time; @@ -43,24 +50,47 @@ struct QueryMetricLogStatus BackgroundSchedulePool::TaskHolder task; }; +struct QueryMetricLogStatus +{ + using TimePoint = std::chrono::system_clock::time_point; + using Mutex = std::mutex; + + QueryMetricLogStatusInfo info TSA_GUARDED_BY(getMutex()); + + /// We need to be able to move it for the hash map, so we need to add an indirection here. + std::unique_ptr mutex = std::make_unique(); + + /// Return a reference to the mutex, used for Thread Sanitizer annotations. + Mutex & getMutex() const TSA_RETURN_CAPABILITY(mutex) + { + if (!mutex) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mutex cannot be NULL"); + return *mutex; + } + + void scheduleNext(String query_id) TSA_REQUIRES(getMutex()); + std::optional createLogMetricElement(const String & query_id, const QueryStatusInfo & query_info, TimePoint query_info_time, bool schedule_next = true) TSA_REQUIRES(getMutex()); +}; + class QueryMetricLog : public SystemLog { using SystemLog::SystemLog; - using TimePoint = std::chrono::system_clock::time_point; using Base = SystemLog; public: + using TimePoint = std::chrono::system_clock::time_point; + void shutdown() final; - // Both startQuery and finishQuery are called from the thread that executes the query + /// Both startQuery and finishQuery are called from the thread that executes the query. void startQuery(const String & query_id, TimePoint start_time, UInt64 interval_milliseconds); void finishQuery(const String & query_id, TimePoint finish_time, QueryStatusInfoPtr query_info = nullptr); private: - std::optional createLogMetricElement(const String & query_id, const QueryStatusInfo & query_info, TimePoint query_info_time, bool schedule_next = true); + void collectMetric(const ProcessList & process_list, String query_id); - std::recursive_mutex queries_mutex; - std::unordered_map queries; + std::mutex queries_mutex; + std::unordered_map queries TSA_GUARDED_BY(queries_mutex); }; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index fa28fa04ab1..0bc1d4956a1 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -505,6 +505,7 @@ void logQueryFinish( auto time_now = std::chrono::system_clock::now(); QueryStatusInfo info = process_list_elem->getInfo(true, settings[Setting::log_profile_events]); + logQueryMetricLogFinish(context, internal, elem.client_info.current_query_id, time_now, std::make_shared(info)); elem.type = QueryLogElementType::QUERY_FINISH; addStatusInfoToQueryLogElement(elem, info, query_ast, context); @@ -623,6 +624,7 @@ void logQueryException( { elem.query_duration_ms = start_watch.elapsedMilliseconds(); } + logQueryMetricLogFinish(context, internal, elem.client_info.current_query_id, time_now, info); elem.query_cache_usage = QueryCache::Usage::None; @@ -652,8 +654,6 @@ void logQueryException( query_span->addAttribute("clickhouse.exception_code", elem.exception_code); query_span->finish(); } - - logQueryMetricLogFinish(context, internal, elem.client_info.current_query_id, time_now, info); } void logExceptionBeforeStart( @@ -707,6 +707,8 @@ void logExceptionBeforeStart( elem.client_info = context->getClientInfo(); + logQueryMetricLogFinish(context, false, elem.client_info.current_query_id, std::chrono::system_clock::now(), nullptr); + elem.log_comment = settings[Setting::log_comment]; if (elem.log_comment.size() > settings[Setting::max_query_size]) elem.log_comment.resize(settings[Setting::max_query_size]); @@ -751,8 +753,6 @@ void logExceptionBeforeStart( ProfileEvents::increment(ProfileEvents::FailedInsertQuery); } } - - logQueryMetricLogFinish(context, false, elem.client_info.current_query_id, std::chrono::system_clock::now(), nullptr); } void validateAnalyzerSettings(ASTPtr ast, bool context_value) diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp b/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp index fb3ed7f80fc..1832cc2ad42 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.cpp @@ -6,12 +6,23 @@ namespace DB { +namespace Setting +{ + extern const SettingsBool query_plan_merge_filters; +} + BuildQueryPipelineSettings BuildQueryPipelineSettings::fromContext(ContextPtr from) { + const auto & query_settings = from->getSettingsRef(); BuildQueryPipelineSettings settings; - settings.actions_settings = ExpressionActionsSettings::fromSettings(from->getSettingsRef(), CompileExpressions::yes); + settings.actions_settings = ExpressionActionsSettings::fromSettings(query_settings, CompileExpressions::yes); settings.process_list_element = from->getProcessListElement(); settings.progress_callback = from->getProgressCallback(); + + /// Setting query_plan_merge_filters is enabled by default. + /// But it can brake short-circuit without splitting filter step into smaller steps. + /// So, enable and disable this optimizations together. + settings.enable_multiple_filters_transforms_for_and_chain = query_settings[Setting::query_plan_merge_filters]; return settings; } diff --git a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h index d99f9a7d1f1..6219e37db58 100644 --- a/src/Processors/QueryPlan/BuildQueryPipelineSettings.h +++ b/src/Processors/QueryPlan/BuildQueryPipelineSettings.h @@ -17,6 +17,8 @@ using TemporaryFileLookupPtr = std::shared_ptr; struct BuildQueryPipelineSettings { + bool enable_multiple_filters_transforms_for_and_chain = true; + ExpressionActionsSettings actions_settings; QueryStatusPtr process_list_element; ProgressCallback progress_callback = nullptr; diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp index 862e03d74f2..af9e3f0c515 100644 --- a/src/Processors/QueryPlan/FilterStep.cpp +++ b/src/Processors/QueryPlan/FilterStep.cpp @@ -5,6 +5,11 @@ #include #include #include +#include +#include +#include +#include +#include namespace DB { @@ -24,6 +29,92 @@ static ITransformingStep::Traits getTraits() }; } +static bool isTrivialSubtree(const ActionsDAG::Node * node) +{ + while (node->type == ActionsDAG::ActionType::ALIAS) + node = node->children.at(0); + + return node->type != ActionsDAG::ActionType::FUNCTION && node->type != ActionsDAG::ActionType::ARRAY_JOIN; +} + +struct ActionsAndName +{ + ActionsDAG dag; + std::string name; +}; + +static ActionsAndName splitSingleAndFilter(ActionsDAG & dag, const ActionsDAG::Node * filter_node) +{ + auto split_result = dag.split({filter_node}, true); + dag = std::move(split_result.second); + + const auto * split_filter_node = split_result.split_nodes_mapping[filter_node]; + auto filter_type = removeLowCardinality(split_filter_node->result_type); + if (!filter_type->onlyNull() && !isUInt8(removeNullable(filter_type))) + { + DataTypePtr cast_type = std::make_shared(); + if (filter_type->isNullable()) + cast_type = std::make_shared(std::move(cast_type)); + + split_filter_node = &split_result.first.addCast(*split_filter_node, cast_type, {}); + } + + split_result.first.getOutputs().emplace(split_result.first.getOutputs().begin(), split_filter_node); + auto name = split_filter_node->result_name; + return ActionsAndName{std::move(split_result.first), std::move(name)}; +} + +/// Try to split the left most AND atom to a separate DAG. +static std::optional trySplitSingleAndFilter(ActionsDAG & dag, const std::string & filter_name) +{ + const auto * filter = &dag.findInOutputs(filter_name); + while (filter->type == ActionsDAG::ActionType::ALIAS) + filter = filter->children.at(0); + + if (filter->type != ActionsDAG::ActionType::FUNCTION || filter->function_base->getName() != "and") + return {}; + + const ActionsDAG::Node * condition_to_split = nullptr; + std::stack nodes; + nodes.push(filter); + while (!nodes.empty()) + { + const auto * node = nodes.top(); + nodes.pop(); + + if (node->type == ActionsDAG::ActionType::FUNCTION && node->function_base->getName() == "and") + { + /// The order is important. We should take the left-most atom, so put conditions on stack in reverse order. + for (const auto * child : node->children | std::ranges::views::reverse) + nodes.push(child); + + continue; + } + + if (isTrivialSubtree(node)) + continue; + + /// Do not split subtree if it's the last non-trivial one. + /// So, split the first found condition only when there is a another one found. + if (condition_to_split) + return splitSingleAndFilter(dag, condition_to_split); + + condition_to_split = node; + } + + return {}; +} + +std::vector splitAndChainIntoMultipleFilters(ActionsDAG & dag, const std::string & filter_name) +{ + std::vector res; + + while (auto condition = trySplitSingleAndFilter(dag, filter_name)) + res.push_back(std::move(*condition)); + + return res; +} + FilterStep::FilterStep( const Header & input_header_, ActionsDAG actions_dag_, @@ -50,6 +141,23 @@ FilterStep::FilterStep( void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings) { + std::vector and_atoms; + + /// Splitting AND filter condition to steps under the setting, which is enabled with merge_filters optimization. + /// This is needed to support short-circuit properly. + if (settings.enable_multiple_filters_transforms_for_and_chain && !actions_dag.hasStatefulFunctions()) + and_atoms = splitAndChainIntoMultipleFilters(actions_dag, filter_column_name); + + for (auto & and_atom : and_atoms) + { + auto expression = std::make_shared(std::move(and_atom.dag), settings.getActionsSettings()); + pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) + { + bool on_totals = stream_type == QueryPipelineBuilder::StreamType::Totals; + return std::make_shared(header, expression, and_atom.name, true, on_totals); + }); + } + auto expression = std::make_shared(std::move(actions_dag), settings.getActionsSettings()); pipeline.addSimpleTransform([&](const Block & header, QueryPipelineBuilder::StreamType stream_type) @@ -76,18 +184,45 @@ void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ void FilterStep::describeActions(FormatSettings & settings) const { String prefix(settings.offset, settings.indent_char); + + auto cloned_dag = actions_dag.clone(); + + std::vector and_atoms; + if (!actions_dag.hasStatefulFunctions()) + and_atoms = splitAndChainIntoMultipleFilters(cloned_dag, filter_column_name); + + for (auto & and_atom : and_atoms) + { + auto expression = std::make_shared(std::move(and_atom.dag)); + settings.out << prefix << "AND column: " << and_atom.name << '\n'; + expression->describeActions(settings.out, prefix); + } + settings.out << prefix << "Filter column: " << filter_column_name; if (remove_filter_column) settings.out << " (removed)"; settings.out << '\n'; - auto expression = std::make_shared(actions_dag.clone()); + auto expression = std::make_shared(std::move(cloned_dag)); expression->describeActions(settings.out, prefix); } void FilterStep::describeActions(JSONBuilder::JSONMap & map) const { + auto cloned_dag = actions_dag.clone(); + + std::vector and_atoms; + if (!actions_dag.hasStatefulFunctions()) + and_atoms = splitAndChainIntoMultipleFilters(cloned_dag, filter_column_name); + + for (auto & and_atom : and_atoms) + { + auto expression = std::make_shared(std::move(and_atom.dag)); + map.add("AND column", and_atom.name); + map.add("Expression", expression->toTree()); + } + map.add("Filter Column", filter_column_name); map.add("Removes Filter", remove_filter_column); diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index 6232fc7f54f..55a9d18f063 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -32,7 +32,7 @@ struct QueryPlanOptimizationSettings bool merge_expressions = true; /// If merge-filters optimization is enabled. - bool merge_filters = false; + bool merge_filters = true; /// If filter push down optimization is enabled. bool filter_push_down = true; diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 26936a19a20..626e43898e4 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -176,6 +176,7 @@ namespace Setting extern const SettingsBool use_skip_indexes; extern const SettingsBool use_skip_indexes_if_final; extern const SettingsBool use_uncompressed_cache; + extern const SettingsBool query_plan_merge_filters; extern const SettingsUInt64 merge_tree_min_read_task_size; extern const SettingsBool read_in_order_use_virtual_row; } @@ -208,6 +209,7 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings( .use_asynchronous_read_from_pool = settings[Setting::allow_asynchronous_read_from_io_pool_for_merge_tree] && (settings[Setting::max_streams_to_max_threads_ratio] > 1 || settings[Setting::max_streams_for_merge_tree_reading] > 1), .enable_multiple_prewhere_read_steps = settings[Setting::enable_multiple_prewhere_read_steps], + .force_short_circuit_execution = settings[Setting::query_plan_merge_filters] }; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 51c445945e6..ea01a0ed0f9 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2263,18 +2263,18 @@ void IMergeTreeDataPart::checkConsistencyWithProjections(bool require_part_metad proj_part->checkConsistency(require_part_metadata); } -void IMergeTreeDataPart::calculateColumnsAndSecondaryIndicesSizesOnDisk() +void IMergeTreeDataPart::calculateColumnsAndSecondaryIndicesSizesOnDisk(std::optional columns_sample) { - calculateColumnsSizesOnDisk(); + calculateColumnsSizesOnDisk(columns_sample); calculateSecondaryIndicesSizesOnDisk(); } -void IMergeTreeDataPart::calculateColumnsSizesOnDisk() +void IMergeTreeDataPart::calculateColumnsSizesOnDisk(std::optional columns_sample) { if (getColumns().empty() || checksums.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot calculate columns sizes when columns or checksums are not initialized"); - calculateEachColumnSizes(columns_sizes, total_columns_size); + calculateEachColumnSizes(columns_sizes, total_columns_size, columns_sample); } void IMergeTreeDataPart::calculateSecondaryIndicesSizesOnDisk() @@ -2521,22 +2521,24 @@ ColumnPtr IMergeTreeDataPart::getColumnSample(const NameAndTypePair & column) co StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr(); StorageSnapshotPtr storage_snapshot_ptr = std::make_shared(storage, metadata_ptr); + MergeTreeReaderSettings settings; + settings.can_read_part_without_marks = true; MergeTreeReaderPtr reader = getReader( cols, storage_snapshot_ptr, - MarkRanges{MarkRange(0, 1)}, + MarkRanges{MarkRange(0, total_mark)}, /*virtual_fields=*/ {}, /*uncompressed_cache=*/{}, storage.getContext()->getMarkCache().get(), std::make_shared(), - MergeTreeReaderSettings{}, + settings, ValueSizeMap{}, ReadBufferFromFileBase::ProfileCallback{}); Columns result; result.resize(1); - reader->readRows(0, 1, false, 0, result); + reader->readRows(0, total_mark, false, 0, result); return result[0]; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 55f1265318c..24625edf154 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -428,7 +428,7 @@ public: bool shallParticipateInMerges(const StoragePolicyPtr & storage_policy) const; /// Calculate column and secondary indices sizes on disk. - void calculateColumnsAndSecondaryIndicesSizesOnDisk(); + void calculateColumnsAndSecondaryIndicesSizesOnDisk(std::optional columns_sample = std::nullopt); std::optional getRelativePathForPrefix(const String & prefix, bool detached = false, bool broken = false) const; @@ -633,7 +633,7 @@ protected: /// Fill each_columns_size and total_size with sizes from columns files on /// disk using columns and checksums. - virtual void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const = 0; + virtual void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size, std::optional columns_sample) const = 0; std::optional getRelativePathForDetachedPart(const String & prefix, bool broken) const; @@ -715,7 +715,7 @@ private: void loadPartitionAndMinMaxIndex(); - void calculateColumnsSizesOnDisk(); + void calculateColumnsSizesOnDisk(std::optional columns_sample = std::nullopt); void calculateSecondaryIndicesSizesOnDisk(); diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index d1c76505d7c..8923f6a59ca 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -54,6 +54,8 @@ public: const MergeTreeIndexGranularity & getIndexGranularity() const { return index_granularity; } + virtual Block getColumnsSample() const = 0; + protected: SerializationPtr getSerialization(const String & column_name) const; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 7ba358d2d35..4a7e02a7a51 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -330,7 +330,7 @@ MergeTreeReadTaskColumns getReadTaskColumns( auto prewhere_actions = MergeTreeSelectProcessor::getPrewhereActions( prewhere_info, actions_settings, - reader_settings.enable_multiple_prewhere_read_steps); + reader_settings.enable_multiple_prewhere_read_steps, reader_settings.force_short_circuit_execution); for (const auto & step : prewhere_actions.steps) add_step(*step); diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 14c2da82de1..8856f467b90 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -80,7 +80,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartCompactWriter( } -void MergeTreeDataPartCompact::calculateEachColumnSizes(ColumnSizeByName & /*each_columns_size*/, ColumnSize & total_size) const +void MergeTreeDataPartCompact::calculateEachColumnSizes(ColumnSizeByName & /*each_columns_size*/, ColumnSize & total_size, std::optional /*columns_sample*/) const { auto bin_checksum = checksums.files.find(DATA_FILE_NAME_WITH_EXTENSION); if (bin_checksum != checksums.files.end()) diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 8e279571578..c394de0d7c1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -70,7 +70,7 @@ private: void loadIndexGranularity() override; /// Compact parts don't support per column size, only total size - void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override; + void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size, std::optional columns_sample) const override; }; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index c515d645253..39f96ba06ad 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -82,7 +82,7 @@ MergeTreeDataPartWriterPtr createMergeTreeDataPartWideWriter( /// Takes into account the fact that several columns can e.g. share their .size substreams. /// When calculating totals these should be counted only once. ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( - const NameAndTypePair & column, std::unordered_set * processed_substreams) const + const NameAndTypePair & column, std::unordered_set * processed_substreams, std::optional columns_sample) const { ColumnSize size; if (checksums.empty()) @@ -108,7 +108,7 @@ ColumnSize MergeTreeDataPartWide::getColumnSizeImpl( auto mrk_checksum = checksums.files.find(*stream_name + getMarksFileExtension()); if (mrk_checksum != checksums.files.end()) size.marks += mrk_checksum->second.file_size; - }); + }, column.type, columns_sample && columns_sample->has(column.name) ? columns_sample->getByName(column.name).column : getColumnSample(column)); return size; } @@ -374,12 +374,12 @@ std::optional MergeTreeDataPartWide::getFileNameForColumn(const NameAndT return filename; } -void MergeTreeDataPartWide::calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const +void MergeTreeDataPartWide::calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size, std::optional columns_sample) const { std::unordered_set processed_substreams; for (const auto & column : columns) { - ColumnSize size = getColumnSizeImpl(column, &processed_substreams); + ColumnSize size = getColumnSizeImpl(column, &processed_substreams, columns_sample); each_columns_size[column.name] = size; total_size.add(size); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 022a5fb746c..a6d4897ed87 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -64,9 +64,9 @@ private: /// Loads marks index granularity into memory void loadIndexGranularity() override; - ColumnSize getColumnSizeImpl(const NameAndTypePair & column, std::unordered_set * processed_substreams) const; + ColumnSize getColumnSizeImpl(const NameAndTypePair & column, std::unordered_set * processed_substreams, std::optional columns_sample) const; - void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size) const override; + void calculateEachColumnSizes(ColumnSizeByName & each_columns_size, ColumnSize & total_size, std::optional columns_sample) const override; }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 49d654c15e1..b22d58ba51e 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -123,6 +123,8 @@ public: written_offset_columns = written_offset_columns_; } + Block getColumnsSample() const override { return block_sample; } + protected: /// Count index_granularity for block and store in `index_granularity` size_t computeIndexGranularity(const Block & block) const; diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 4d1d2533729..7506c726bc4 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -45,6 +45,8 @@ struct MergeTreeReaderSettings bool use_asynchronous_read_from_pool = false; /// If PREWHERE has multiple conditions combined with AND, execute them in separate read/filtering steps. bool enable_multiple_prewhere_read_steps = false; + /// In case of multiple prewhere steps, execute filtering earlier to support short-circuit properly. + bool force_short_circuit_execution = false; /// If true, try to lower size of read buffer according to granule size and compressed block size. bool adjust_read_buffer_size = true; /// If true, it's allowed to read the whole part without reading marks. diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 77231d8d392..885bd1ded8c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -172,7 +172,7 @@ size_t MergeTreeReaderWide::readRows( throw; } - if (column->empty()) + if (column->empty() && max_rows_to_read > 0) res_columns[pos] = nullptr; } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 867040531e4..426e95fd95d 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -91,7 +91,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( , algorithm(std::move(algorithm_)) , prewhere_info(prewhere_info_) , actions_settings(actions_settings_) - , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps)) + , prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps, reader_settings_.force_short_circuit_execution)) , reader_settings(reader_settings_) , result_header(transformHeader(pool->getHeader(), prewhere_info)) { @@ -124,9 +124,9 @@ String MergeTreeSelectProcessor::getName() const return fmt::format("MergeTreeSelect(pool: {}, algorithm: {})", pool->getName(), algorithm->getName()); } -bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere); +bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere, bool force_short_circuit_execution); -PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps) +PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, bool enable_multiple_prewhere_read_steps, bool force_short_circuit_execution) { PrewhereExprInfo prewhere_actions; if (prewhere_info) @@ -147,7 +147,7 @@ PrewhereExprInfo MergeTreeSelectProcessor::getPrewhereActions(PrewhereInfoPtr pr } if (!enable_multiple_prewhere_read_steps || - !tryBuildPrewhereSteps(prewhere_info, actions_settings, prewhere_actions)) + !tryBuildPrewhereSteps(prewhere_info, actions_settings, prewhere_actions, force_short_circuit_execution)) { PrewhereExprStep prewhere_step { diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index 33069a78e33..32a761cefb7 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -73,7 +73,8 @@ public: static PrewhereExprInfo getPrewhereActions( PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, - bool enable_multiple_prewhere_read_steps); + bool enable_multiple_prewhere_read_steps, + bool force_short_circuit_execution); void addPartLevelToChunk(bool add_part_level_) { add_part_level = add_part_level_; } diff --git a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp index 9c82817e8cb..1cc4006a285 100644 --- a/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp +++ b/src/Storages/MergeTree/MergeTreeSplitPrewhereIntoReadSteps.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -57,9 +58,9 @@ struct DAGNodeRef const ActionsDAG::Node * node; }; -/// Result name -> DAGNodeRef -using OriginalToNewNodeMap = std::unordered_map; -using NodeNameToLastUsedStepMap = std::unordered_map; +/// ResultNode -> DAGNodeRef +using OriginalToNewNodeMap = std::unordered_map; +using NodeNameToLastUsedStepMap = std::unordered_map; /// Clones the part of original DAG responsible for computing the original_dag_node and adds it to the new DAG. const ActionsDAG::Node & addClonedDAGToDAG( @@ -69,25 +70,28 @@ const ActionsDAG::Node & addClonedDAGToDAG( OriginalToNewNodeMap & node_remap, NodeNameToLastUsedStepMap & node_to_step_map) { - const String & node_name = original_dag_node->result_name; /// Look for the node in the map of already known nodes - if (node_remap.contains(node_name)) + if (node_remap.contains(original_dag_node)) { /// If the node is already in the new DAG, return it - const auto & node_ref = node_remap.at(node_name); + const auto & node_ref = node_remap.at(original_dag_node); if (node_ref.dag == new_dag.get()) return *node_ref.node; /// If the node is known from the previous steps, add it as an input, except for constants if (original_dag_node->type != ActionsDAG::ActionType::COLUMN) { - node_ref.dag->addOrReplaceInOutputs(*node_ref.node); + /// If the node was found in node_remap, it was not added to outputs yet. + /// The only exception is the filter node, which is always the first one. + if (node_ref.dag->getOutputs().at(0) != node_ref.node) + node_ref.dag->getOutputs().push_back(node_ref.node); + const auto & new_node = new_dag->addInput(node_ref.node->result_name, node_ref.node->result_type); - node_remap[node_name] = {new_dag.get(), &new_node}; /// TODO: here we update the node reference. Is it always correct? + node_remap[original_dag_node] = {new_dag.get(), &new_node}; /// Remember the index of the last step which reuses this node. /// We cannot remove this node from the outputs before that step. - node_to_step_map[node_name] = step; + node_to_step_map[original_dag_node] = step; return new_node; } } @@ -96,7 +100,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( if (original_dag_node->type == ActionsDAG::ActionType::INPUT) { const auto & new_node = new_dag->addInput(original_dag_node->result_name, original_dag_node->result_type); - node_remap[node_name] = {new_dag.get(), &new_node}; + node_remap[original_dag_node] = {new_dag.get(), &new_node}; return new_node; } @@ -105,7 +109,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { const auto & new_node = new_dag->addColumn( ColumnWithTypeAndName(original_dag_node->column, original_dag_node->result_type, original_dag_node->result_name)); - node_remap[node_name] = {new_dag.get(), &new_node}; + node_remap[original_dag_node] = {new_dag.get(), &new_node}; return new_node; } @@ -113,7 +117,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( { const auto & alias_child = addClonedDAGToDAG(step, original_dag_node->children[0], new_dag, node_remap, node_to_step_map); const auto & new_node = new_dag->addAlias(alias_child, original_dag_node->result_name); - node_remap[node_name] = {new_dag.get(), &new_node}; + node_remap[original_dag_node] = {new_dag.get(), &new_node}; return new_node; } @@ -128,7 +132,7 @@ const ActionsDAG::Node & addClonedDAGToDAG( } const auto & new_node = new_dag->addFunction(original_dag_node->function_base, new_children, original_dag_node->result_name); - node_remap[node_name] = {new_dag.get(), &new_node}; + node_remap[original_dag_node] = {new_dag.get(), &new_node}; return new_node; } @@ -138,11 +142,9 @@ const ActionsDAG::Node & addClonedDAGToDAG( const ActionsDAG::Node & addFunction( const ActionsDAGPtr & new_dag, const FunctionOverloadResolverPtr & function, - ActionsDAG::NodeRawConstPtrs children, - OriginalToNewNodeMap & node_remap) + ActionsDAG::NodeRawConstPtrs children) { const auto & new_node = new_dag->addFunction(function, children, ""); - node_remap[new_node.result_name] = {new_dag.get(), &new_node}; return new_node; } @@ -152,14 +154,12 @@ const ActionsDAG::Node & addFunction( const ActionsDAG::Node & addCast( const ActionsDAGPtr & dag, const ActionsDAG::Node & node_to_cast, - const DataTypePtr & to_type, - OriginalToNewNodeMap & node_remap) + const DataTypePtr & to_type) { if (!node_to_cast.result_type->equals(*to_type)) return node_to_cast; const auto & new_node = dag->addCast(node_to_cast, to_type, {}); - node_remap[new_node.result_name] = {dag.get(), &new_node}; return new_node; } @@ -169,8 +169,7 @@ const ActionsDAG::Node & addCast( /// 2. makes sure that the result contains only 0 or 1 values even if the source column contains non-boolean values. const ActionsDAG::Node & addAndTrue( const ActionsDAGPtr & dag, - const ActionsDAG::Node & filter_node_to_normalize, - OriginalToNewNodeMap & node_remap) + const ActionsDAG::Node & filter_node_to_normalize) { Field const_true_value(true); @@ -181,7 +180,7 @@ const ActionsDAG::Node & addAndTrue( const auto * const_true_node = &dag->addColumn(std::move(const_true_column)); ActionsDAG::NodeRawConstPtrs children = {&filter_node_to_normalize, const_true_node}; FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - return addFunction(dag, func_builder_and, children, node_remap); + return addFunction(dag, func_builder_and, children); } } @@ -206,7 +205,11 @@ const ActionsDAG::Node & addAndTrue( /// 6. Find all outputs of the original DAG /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4 -bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionActionsSettings & actions_settings, PrewhereExprInfo & prewhere) +bool tryBuildPrewhereSteps( + PrewhereInfoPtr prewhere_info, + const ExpressionActionsSettings & actions_settings, + PrewhereExprInfo & prewhere, + bool force_short_circuit_execution) { if (!prewhere_info) return true; @@ -243,7 +246,10 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction struct Step { ActionsDAGPtr actions; - String column_name; + /// Original condition, in case if we have only one condition, and it was not casted + const ActionsDAG::Node * original_node; + /// Result condition node + const ActionsDAG::Node * result_node; }; std::vector steps; @@ -254,7 +260,8 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction { const auto & condition_group = condition_groups[step_index]; ActionsDAGPtr step_dag = std::make_unique(); - String result_name; + const ActionsDAG::Node * original_node = nullptr; + const ActionsDAG::Node * result_node; std::vector new_condition_nodes; for (const auto * node : condition_group) @@ -267,48 +274,37 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction { /// Add AND function to combine the conditions FunctionOverloadResolverPtr func_builder_and = std::make_unique(std::make_shared()); - const auto & and_function_node = addFunction(step_dag, func_builder_and, new_condition_nodes, node_remap); - step_dag->addOrReplaceInOutputs(and_function_node); - result_name = and_function_node.result_name; + const auto & and_function_node = addFunction(step_dag, func_builder_and, new_condition_nodes); + result_node = &and_function_node; } else { - const auto & result_node = *new_condition_nodes.front(); + result_node = new_condition_nodes.front(); /// Check if explicit cast is needed for the condition to serve as a filter. - const auto result_type_name = result_node.result_type->getName(); - if (result_type_name == "UInt8" || - result_type_name == "Nullable(UInt8)" || - result_type_name == "LowCardinality(UInt8)" || - result_type_name == "LowCardinality(Nullable(UInt8))") - { - /// No need to cast - step_dag->addOrReplaceInOutputs(result_node); - result_name = result_node.result_name; - } - else + if (!isUInt8(removeNullable(removeLowCardinality(result_node->result_type)))) { /// Build "condition AND True" expression to "cast" the condition to UInt8 or Nullable(UInt8) depending on its type. - const auto & cast_node = addAndTrue(step_dag, result_node, node_remap); - step_dag->addOrReplaceInOutputs(cast_node); - result_name = cast_node.result_name; + result_node = &addAndTrue(step_dag, *result_node); } } - steps.push_back({std::move(step_dag), result_name}); + step_dag->getOutputs().insert(step_dag->getOutputs().begin(), result_node); + steps.push_back({std::move(step_dag), original_node, result_node}); } /// 6. Find all outputs of the original DAG auto original_outputs = prewhere_info->prewhere_actions.getOutputs(); + steps.back().actions->getOutputs().clear(); /// 7. Find all outputs that were computed in the already built DAGs, mark these nodes as outputs in the steps where they were computed /// 8. Add computation of the remaining outputs to the last step with the procedure similar to 4 - NameSet all_output_names; + std::unordered_set all_outputs; for (const auto * output : original_outputs) { - all_output_names.insert(output->result_name); - if (node_remap.contains(output->result_name)) + all_outputs.insert(output); + if (node_remap.contains(output)) { - const auto & new_node_info = node_remap[output->result_name]; - new_node_info.dag->addOrReplaceInOutputs(*new_node_info.node); + const auto & new_node_info = node_remap[output]; + new_node_info.dag->getOutputs().push_back(new_node_info.node); } else if (output->result_name == prewhere_info->prewhere_column_name) { @@ -319,20 +315,21 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction /// 1. AND the last condition with constant True. This is needed to make sure that in the last step filter has UInt8 type /// but contains values other than 0 and 1 (e.g. if it is (number%5) it contains 2,3,4) /// 2. CAST the result to the exact type of the PREWHERE column from the original DAG - const auto & last_step_result_node_info = node_remap[steps.back().column_name]; auto & last_step_dag = steps.back().actions; + auto & last_step_result_node = steps.back().result_node; /// Build AND(last_step_result_node, true) - const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node_info.node, node_remap); + const auto & and_node = addAndTrue(last_step_dag, *last_step_result_node); /// Build CAST(and_node, type of PREWHERE column) - const auto & cast_node = addCast(last_step_dag, and_node, output->result_type, node_remap); + const auto & cast_node = addCast(last_step_dag, and_node, output->result_type); /// Add alias for the result with the name of the PREWHERE column const auto & prewhere_result_node = last_step_dag->addAlias(cast_node, output->result_name); - last_step_dag->addOrReplaceInOutputs(prewhere_result_node); + last_step_dag->getOutputs().push_back(&prewhere_result_node); + steps.back().result_node = &prewhere_result_node; } else { const auto & node_in_new_dag = addClonedDAGToDAG(steps.size() - 1, output, steps.back().actions, node_remap, node_to_step); - steps.back().actions->addOrReplaceInOutputs(node_in_new_dag); + steps.back().actions->getOutputs().push_back(&node_in_new_dag); } } @@ -345,17 +342,18 @@ bool tryBuildPrewhereSteps(PrewhereInfoPtr prewhere_info, const ExpressionAction { .type = PrewhereExprStep::Filter, .actions = std::make_shared(std::move(*step.actions), actions_settings), - .filter_column_name = step.column_name, + .filter_column_name = step.result_node->result_name, /// Don't remove if it's in the list of original outputs .remove_filter_column = - !all_output_names.contains(step.column_name) && node_to_step[step.column_name] <= step_index, - .need_filter = false, + step.original_node && !all_outputs.contains(step.original_node) && node_to_step[step.original_node] <= step_index, + .need_filter = force_short_circuit_execution, .perform_alter_conversions = true, }; prewhere.steps.push_back(std::make_shared(std::move(new_step))); } + prewhere.steps.back()->remove_filter_column = prewhere_info->remove_prewhere_column; prewhere.steps.back()->need_filter = prewhere_info->need_filter; } diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 39096718b5c..14a521ce429 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -209,7 +209,7 @@ MergedBlockOutputStream::Finalizer MergedBlockOutputStream::finalizePartAsync( new_part->index_granularity = writer->getIndexGranularity(); /// Just in case new_part->index_granularity.shrinkToFitInMemory(); - new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(); + new_part->calculateColumnsAndSecondaryIndicesSizesOnDisk(writer->getColumnsSample()); /// In mutation, existing_rows_count is already calculated in PartMergerWriter /// In merge situation, lightweight deleted rows was physically deleted, existing_rows_count equals rows_count diff --git a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp index ba1a97bc2fb..e702f07208a 100644 --- a/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp +++ b/src/Storages/ObjectStorageQueue/ObjectStorageQueueSource.cpp @@ -659,7 +659,7 @@ void ObjectStorageQueueSource::applyActionAfterProcessing(const String & path) { if (files_metadata->getTableMetadata().after_processing == ObjectStorageQueueAction::DELETE) { - object_storage->removeObject(StoredObject(path)); + object_storage->removeObjectIfExists(StoredObject(path)); } } diff --git a/tests/ci/ci.py b/tests/ci/ci.py index d6eeb6a3613..eb0fe7c85c3 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -798,10 +798,6 @@ def _upload_build_profile_data( logging.info("Unknown CI logs host, skip uploading build profile data") return - if not pr_info.number == 0: - logging.info("Skipping uploading build profile data for PRs") - return - instance_type = get_instance_type() instance_id = get_instance_id() auth = { diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index edf93b4b39f..7fc7556e85b 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -82,12 +82,12 @@ Filter column: notEquals(__table1.y, 0_UInt8) 9 10 > one condition of filter should be pushed down after aggregating, other two conditions are ANDed Filter column -FUNCTION and(minus(s, 8) :: 5, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) +FUNCTION and(minus(s, 8) :: 3, minus(s, 4) :: 5) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4)) Aggregating Filter column: notEquals(y, 0) > (analyzer) one condition of filter should be pushed down after aggregating, other two conditions are ANDed Filter column -FUNCTION and(minus(__table1.s, 8_UInt8) :: 1, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8)) +FUNCTION and(minus(__table1.s, 8_UInt8) :: 3, minus(__table1.s, 4_UInt8) :: 5) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8)) Aggregating Filter column: notEquals(__table1.y, 0_UInt8) 0 1 @@ -163,7 +163,6 @@ Filter column: notEquals(__table1.y, 2_UInt8) > filter is pushed down before CreatingSets CreatingSets Filter -Filter 1 3 > one condition of filter is pushed down before LEFT JOIN diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 42cdac8c01f..04ab9bbd11c 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -89,14 +89,14 @@ $CLICKHOUSE_CLIENT --enable_analyzer=0 --convert_query_to_cnf=0 -q " select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 8) :: 5, minus(s, 4) :: 2) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(y, 0)\|FUNCTION and(minus(s, 8) :: 3, minus(s, 4) :: 5) -> and(notEquals(y, 0), minus(s, 8), minus(s, 4))" echo "> (analyzer) one condition of filter should be pushed down after aggregating, other two conditions are ANDed" $CLICKHOUSE_CLIENT --enable_analyzer=1 --convert_query_to_cnf=0 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 settings enable_optimize_predicate_expression=0" | - grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 8_UInt8) :: 1, minus(__table1.s, 4_UInt8) :: 2) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8))" + grep -o "Aggregating\|Filter column\|Filter column: notEquals(__table1.y, 0_UInt8)\|FUNCTION and(minus(__table1.s, 8_UInt8) :: 3, minus(__table1.s, 4_UInt8) :: 5) -> and(notEquals(__table1.y, 0_UInt8), minus(__table1.s, 8_UInt8), minus(__table1.s, 4_UInt8))" $CLICKHOUSE_CLIENT -q " select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference index 7824fd8cba9..00db41e8ac5 100644 --- a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -332,13 +332,12 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d Expression (Projection) Sorting (Sorting for ORDER BY) Expression (Before ORDER BY) - Filter ((WHERE + (Projection + Before ORDER BY))) - Filter (HAVING) - Aggregating - Expression ((Before GROUP BY + Projection)) - Sorting (Sorting for ORDER BY) - Expression ((Before ORDER BY + (Projection + Before ORDER BY))) - ReadFromSystemNumbers + Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING)) + Aggregating + Expression ((Before GROUP BY + Projection)) + Sorting (Sorting for ORDER BY) + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + ReadFromSystemNumbers -- execute 1 2 diff --git a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference index 9bb0c022752..a382e14ce03 100644 --- a/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference +++ b/tests/queries/0_stateless/02554_fix_grouping_sets_predicate_push_down.reference @@ -28,21 +28,17 @@ WHERE type_1 = \'all\' (Expression) ExpressionTransform × 2 (Filter) - FilterTransform × 2 - (Filter) - FilterTransform × 2 - (Filter) - FilterTransform × 2 - (Aggregating) - ExpressionTransform × 2 - AggregatingTransform × 2 - Copy 1 → 2 - (Expression) - ExpressionTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + FilterTransform × 6 + (Aggregating) + ExpressionTransform × 2 + AggregatingTransform × 2 + Copy 1 → 2 + (Expression) + ExpressionTransform + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Filter) @@ -68,14 +64,10 @@ ExpressionTransform × 2 ExpressionTransform × 2 AggregatingTransform × 2 Copy 1 → 2 - (Filter) - FilterTransform - (Filter) - FilterTransform - (Expression) - ExpressionTransform - (ReadFromMergeTree) - MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 + (Expression) + ExpressionTransform + (ReadFromMergeTree) + MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1 (Expression) ExpressionTransform × 2 (Aggregating) diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference index 80f4e309505..d0a3e7b02ae 100644 --- a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference @@ -163,17 +163,21 @@ Positions: 4 2 0 1 Filter (( + (JOIN actions + Change column names to column identifiers))) Header: __table1.id UInt64 __table1.value String - Filter column: and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) (removed) + AND column: equals(__table1.id, 5_UInt8) Actions: INPUT : 0 -> id UInt64 : 0 - INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 1 + FUNCTION equals(id : 0, 5_UInt8 :: 1) -> equals(__table1.id, 5_UInt8) UInt8 : 2 + Positions: 2 0 2 + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) (removed) + Actions: INPUT : 2 -> value String : 0 + INPUT : 1 -> id UInt64 : 1 COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 - COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 3 - ALIAS id : 0 -> __table1.id UInt64 : 4 - ALIAS value :: 1 -> __table1.value String : 5 - FUNCTION equals(id : 0, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 1 - FUNCTION equals(id :: 0, 5_UInt8 :: 3) -> equals(__table1.id, 5_UInt8) UInt8 : 2 - FUNCTION and(equals(__table1.id, 5_UInt8) :: 2, equals(__table1.id, 6_UInt8) :: 1) -> and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) UInt8 : 3 - Positions: 3 4 5 + INPUT : 0 -> equals(__table1.id, 5_UInt8) UInt8 : 3 + ALIAS value :: 0 -> __table1.value String : 4 + ALIAS id : 1 -> __table1.id UInt64 : 0 + FUNCTION equals(id :: 1, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 5 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 3, equals(__table1.id, 6_UInt8) :: 5) -> and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) UInt8 : 2 + Positions: 2 0 4 ReadFromMergeTree (default.test_table_1) Header: id UInt64 value String @@ -183,17 +187,21 @@ Positions: 4 2 0 1 Filter (( + (JOIN actions + Change column names to column identifiers))) Header: __table2.id UInt64 __table2.value String - Filter column: and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) (removed) + AND column: equals(__table2.id, 6_UInt8) Actions: INPUT : 0 -> id UInt64 : 0 - INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 1 + FUNCTION equals(id : 0, 6_UInt8 :: 1) -> equals(__table2.id, 6_UInt8) UInt8 : 2 + Positions: 2 0 2 + Filter column: and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) (removed) + Actions: INPUT : 2 -> value String : 0 + INPUT : 1 -> id UInt64 : 1 COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 - COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 3 - ALIAS id : 0 -> __table2.id UInt64 : 4 - ALIAS value :: 1 -> __table2.value String : 5 - FUNCTION equals(id : 0, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 - FUNCTION equals(id :: 0, 6_UInt8 :: 3) -> equals(__table2.id, 6_UInt8) UInt8 : 2 - FUNCTION and(equals(__table2.id, 6_UInt8) :: 2, equals(__table2.id, 5_UInt8) :: 1) -> and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) UInt8 : 3 - Positions: 3 4 5 + INPUT : 0 -> equals(__table2.id, 6_UInt8) UInt8 : 3 + ALIAS value :: 0 -> __table2.value String : 4 + ALIAS id : 1 -> __table2.id UInt64 : 0 + FUNCTION equals(id :: 1, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 5 + FUNCTION and(equals(__table2.id, 6_UInt8) :: 3, equals(__table2.id, 5_UInt8) :: 5) -> and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) UInt8 : 2 + Positions: 2 0 4 ReadFromMergeTree (default.test_table_2) Header: id UInt64 value String @@ -656,17 +664,21 @@ Positions: 4 2 0 1 __table1.value String __table2.value String __table2.id UInt64 - Filter column: and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) (removed) + AND column: equals(__table1.id, 5_UInt8) Actions: INPUT : 0 -> __table1.id UInt64 : 0 - INPUT :: 1 -> __table1.value String : 1 - INPUT :: 2 -> __table2.value String : 2 - INPUT : 3 -> __table2.id UInt64 : 3 - COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 - COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 5 - FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 6 - FUNCTION equals(__table2.id : 3, 6_UInt8 :: 5) -> equals(__table2.id, 6_UInt8) UInt8 : 4 - FUNCTION and(equals(__table1.id, 5_UInt8) :: 6, equals(__table2.id, 6_UInt8) :: 4) -> and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) UInt8 : 5 - Positions: 5 0 1 2 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 1 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 1) -> equals(__table1.id, 5_UInt8) UInt8 : 2 + Positions: 2 0 2 + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) (removed) + Actions: INPUT :: 1 -> __table1.id UInt64 : 0 + INPUT :: 2 -> __table1.value String : 1 + INPUT :: 3 -> __table2.value String : 2 + INPUT : 4 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 4 + INPUT : 0 -> equals(__table1.id, 5_UInt8) UInt8 : 5 + FUNCTION equals(__table2.id : 3, 6_UInt8 :: 4) -> equals(__table2.id, 6_UInt8) UInt8 : 6 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 5, equals(__table2.id, 6_UInt8) :: 6) -> and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) UInt8 : 4 + Positions: 4 0 1 2 3 Join (JOIN FillRightFirst) Header: __table1.id UInt64 __table1.value String diff --git a/tests/queries/0_stateless/03199_merge_filters_bug.sql b/tests/queries/0_stateless/03199_merge_filters_bug.sql index ed2ec2ea217..696856c9121 100644 --- a/tests/queries/0_stateless/03199_merge_filters_bug.sql +++ b/tests/queries/0_stateless/03199_merge_filters_bug.sql @@ -1,3 +1,5 @@ +set allow_reorder_prewhere_conditions=0; + drop table if exists t1; drop table if exists t2; @@ -49,7 +51,23 @@ tmp1 AS fs1 FROM t2 LEFT JOIN tmp1 USING (fs1) - WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0; + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0, query_plan_merge_filters=0; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 1, query_plan_merge_filters=1; optimize table t1 final; @@ -67,4 +85,20 @@ tmp1 AS fs1 FROM t2 LEFT JOIN tmp1 USING (fs1) - WHERE (fs1 IN ('test')); + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 0, query_plan_merge_filters=0; + +WITH +tmp1 AS +( + SELECT + CAST(s1, 'FixedString(10)') AS fs1, + s2 AS sector, + s3 + FROM t1 + WHERE (s3 != 'test') +) + SELECT + fs1 + FROM t2 + LEFT JOIN tmp1 USING (fs1) + WHERE (fs1 IN ('test')) SETTINGS enable_multiple_prewhere_read_steps = 1, query_plan_merge_filters=1; diff --git a/tests/queries/0_stateless/03203_system_query_metric_log.reference b/tests/queries/0_stateless/03203_system_query_metric_log.reference index 940b0c4e178..fa8e27a7e90 100644 --- a/tests/queries/0_stateless/03203_system_query_metric_log.reference +++ b/tests/queries/0_stateless/03203_system_query_metric_log.reference @@ -23,8 +23,8 @@ --Interval 123: check that the SleepFunctionCalls, SleepFunctionMilliseconds and ProfileEvent_SleepFunctionElapsedMicroseconds are correct 1 --Check that a query_metric_log_interval=0 disables the collection -0 +1 -Check that a query which execution time is less than query_metric_log_interval is never collected -0 +1 --Check that there is a final event when queries finish -3 +1 diff --git a/tests/queries/0_stateless/03203_system_query_metric_log.sh b/tests/queries/0_stateless/03203_system_query_metric_log.sh index bf94be79d7c..abcd14c8e5d 100755 --- a/tests/queries/0_stateless/03203_system_query_metric_log.sh +++ b/tests/queries/0_stateless/03203_system_query_metric_log.sh @@ -84,17 +84,17 @@ check_log 123 # query_metric_log_interval=0 disables the collection altogether $CLICKHOUSE_CLIENT -m -q """ SELECT '--Check that a query_metric_log_interval=0 disables the collection'; - SELECT count() FROM system.query_metric_log WHERE event_date >= yesterday() AND query_id = '${query_prefix}_0' + SELECT count() == 0 FROM system.query_metric_log WHERE event_date >= yesterday() AND query_id = '${query_prefix}_0' """ # a quick query that takes less than query_metric_log_interval is never collected $CLICKHOUSE_CLIENT -m -q """ SELECT '-Check that a query which execution time is less than query_metric_log_interval is never collected'; - SELECT count() FROM system.query_metric_log WHERE event_date >= yesterday() AND query_id = '${query_prefix}_fast' + SELECT count() == 0 FROM system.query_metric_log WHERE event_date >= yesterday() AND query_id = '${query_prefix}_fast' """ # a query that takes more than query_metric_log_interval is collected including the final row $CLICKHOUSE_CLIENT -m -q """ SELECT '--Check that there is a final event when queries finish'; - SELECT count() FROM system.query_metric_log WHERE event_date >= yesterday() AND query_id = '${query_prefix}_1000' + SELECT count() > 2 FROM system.query_metric_log WHERE event_date >= yesterday() AND query_id = '${query_prefix}_1000' """ diff --git a/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.reference b/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.reference index 063b76b152c..e4be64155d1 100644 --- a/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.reference +++ b/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.reference @@ -1,14 +1,44 @@ +parseDateTime64InJodaSyntax +2077-10-09 10:30:10.123 +1970-01-01 08:00:00 +1970-01-01 08:00:01 +2024-01-02 00:00:00 +1970-01-01 10:30:50 +2025-01-01 15:30:10.123456 +2024-01-01 00:00:01.123456 2024-10-09 10:30:10.123 2024-10-09 10:30:10.123456 2024-10-10 02:30:10.123456 +2024-11-05 17:02:03.123456 2024-10-10 01:30:10.123456 +2024-10-09 08:00:10.123456 +2024-09-10 10:30:10.123 +2024-09-10 10:30:10.000999999 +2024-10-10 03:15:10.123456 +2024-03-01 03:23:34 +2024-02-29 15:22:33 +2023-03-01 15:22:33 +2024-03-04 16:22:33 +2023-03-04 16:22:33 +parseDateTime64InJodaSyntaxOrZero 2024-10-09 10:30:10.123 2024-10-09 10:30:10.123456 1970-01-01 08:00:00.000000000 2024-10-10 02:30:10.123456 2024-10-10 01:30:10.123456 +2024-09-10 10:30:10.123 +1970-01-01 08:00:00.000 +1970-01-01 08:00:00 +1970-01-01 08:00:00 +1970-01-01 08:00:00.000 +parseDateTime64InJodaSyntaxOrNull 2024-10-09 10:30:10.123 2024-10-09 10:30:10.123456 \N 2024-10-10 02:30:10.123456 2024-10-10 01:30:10.123456 +2024-09-10 10:30:10.123 +\N +\N +\N +1970-01-01 00:00:00 diff --git a/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.sql b/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.sql index 9ea854bc324..7e024288f1c 100644 --- a/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.sql +++ b/tests/queries/0_stateless/03252_parse_datetime64_in_joda_syntax.sql @@ -1,19 +1,54 @@ set session_timezone = 'Asia/Shanghai'; +select 'parseDateTime64InJodaSyntax'; +select parseDateTime64InJodaSyntax('', ''); -- { serverError VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE } +select parseDateTime64InJodaSyntax('2077-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); +select parseDateTime64InJodaSyntax('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime64InJodaSyntax('+0000', 'Z'); +select parseDateTime64InJodaSyntax('08:01', 'HH:ss'); +select parseDateTime64InJodaSyntax('2024-01-02', 'yyyy-MM-dd'); +select parseDateTime64InJodaSyntax('10:30:50', 'HH:mm:ss'); +select parseDateTime64InJodaSyntax('2024-12-31 23:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); +select parseDateTime64InJodaSyntax('2024-01-01 00:00:01.123456+0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'); select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS'); -- { serverError CANNOT_PARSE_DATETIME } select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); +select parseDateTime64InJodaSyntax('2024-11-05-0800 01:02:03.123456', 'yyyy-MM-ddZ HH:mm:ss.SSSSSS'); select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); - +select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456Australia/Adelaide', 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); +select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS'); +select parseDateTime64InJodaSyntax('999999 10-09-202410:30:10', 'SSSSSSSSS dd-MM-yyyyHH:mm:ss'); +select parseDateTime64InJodaSyntax('2024-10-09 10:30:10.123456-0845', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); +select parseDateTime64InJodaSyntax('2023-02-29 11:22:33Not/Timezone', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError BAD_ARGUMENTS } +--leap years and non-leap years +select parseDateTime64InJodaSyntax('2024-02-29 11:23:34America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); +select parseDateTime64InJodaSyntax('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); -- { serverError CANNOT_PARSE_DATETIME } +select parseDateTime64InJodaSyntax('2024-02-28 23:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); +select parseDateTime64InJodaSyntax('2023-02-28 23:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); +select parseDateTime64InJodaSyntax('2024-03-01 00:22:33-8000', 'yyyy-MM-dd HH:mm:ssZ'); +select parseDateTime64InJodaSyntax('2023-03-01 00:22:33-8000', 'yyyy-MM-dd HH:mm:ssZ'); +select 'parseDateTime64InJodaSyntaxOrZero'; select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'); select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS'); select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); - +select parseDateTime64InJodaSyntaxOrZero('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS'); +select parseDateTime64InJodaSyntaxOrZero('wrong value', 'yyyy-dd-MM HH:mm:ss.SSS'); +select parseDateTime64InJodaSyntaxOrZero('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); +select parseDateTime64InJodaSyntaxOrZero('', ''); +select parseDateTime64InJodaSyntaxOrZero('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); +select 'parseDateTime64InJodaSyntaxOrNull'; select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456', 'yyyy-MM-dd HH:mm:ss.SSSSSS'); select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456789', 'yyyy-MM-dd HH:mm:ss.SSSSSSSSS'); select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456-0800', 'yyyy-MM-dd HH:mm:ss.SSSSSSZ'); select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123456America/Los_Angeles', 'yyyy-MM-dd HH:mm:ss.SSSSSSz'); +select parseDateTime64InJodaSyntaxOrNull('2024-10-09 10:30:10.123', 'yyyy-dd-MM HH:mm:ss.SSS'); +select parseDateTime64InJodaSyntaxOrNull('2023-02-29 11:22:33America/Los_Angeles', 'yyyy-MM-dd HH:mm:ssz'); +select parseDateTime64InJodaSyntaxOrNull('', ''); +select parseDateTime64InJodaSyntaxOrNull('2177-10-09 10:30:10.123', 'yyyy-MM-dd HH:mm:ss.SSS'); + +set session_timezone = 'UTC'; +select parseDateTime64InJodaSyntax('', ''); diff --git a/tests/queries/0_stateless/03260_compressor_stat.reference b/tests/queries/0_stateless/03260_compressor_stat.reference new file mode 100644 index 00000000000..ba84b26cc48 --- /dev/null +++ b/tests/queries/0_stateless/03260_compressor_stat.reference @@ -0,0 +1 @@ +CODEC(Delta(1), LZ4) 14 48 diff --git a/tests/queries/0_stateless/03260_compressor_stat.sh b/tests/queries/0_stateless/03260_compressor_stat.sh new file mode 100755 index 00000000000..8a03541763c --- /dev/null +++ b/tests/queries/0_stateless/03260_compressor_stat.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "Hello, World!" | $CLICKHOUSE_COMPRESSOR --codec 'Delta' --codec 'LZ4' | $CLICKHOUSE_COMPRESSOR --stat diff --git a/tests/queries/0_stateless/03262_column_sizes_with_dynamic_structure.reference b/tests/queries/0_stateless/03262_column_sizes_with_dynamic_structure.reference new file mode 100644 index 00000000000..5cab16ed96d --- /dev/null +++ b/tests/queries/0_stateless/03262_column_sizes_with_dynamic_structure.reference @@ -0,0 +1 @@ +test 10.00 million 352.87 MiB 39.43 MiB 39.45 MiB diff --git a/tests/queries/0_stateless/03262_column_sizes_with_dynamic_structure.sql b/tests/queries/0_stateless/03262_column_sizes_with_dynamic_structure.sql new file mode 100644 index 00000000000..099bbd5dd22 --- /dev/null +++ b/tests/queries/0_stateless/03262_column_sizes_with_dynamic_structure.sql @@ -0,0 +1,23 @@ +-- Tags: no-random-settings + +set allow_experimental_dynamic_type = 1; +set allow_experimental_json_type = 1; + + +drop table if exists test; +create table test (d Dynamic, json JSON) engine=MergeTree order by tuple() settings min_rows_for_wide_part=0, min_bytes_for_wide_part=1; +insert into test select number, '{"a" : 42, "b" : "Hello, World"}' from numbers(10000000); + +SELECT + `table`, + formatReadableQuantity(sum(rows)) AS rows, + formatReadableSize(sum(data_uncompressed_bytes)) AS data_size_uncompressed, + formatReadableSize(sum(data_compressed_bytes)) AS data_size_compressed, + formatReadableSize(sum(bytes_on_disk)) AS total_size_on_disk +FROM system.parts +WHERE active AND (database = currentDatabase()) AND (`table` = 'test') +GROUP BY `table` +ORDER BY `table` ASC; + +drop table test; + diff --git a/tests/queries/0_stateless/03262_filter_push_down_view.reference b/tests/queries/0_stateless/03262_filter_push_down_view.reference new file mode 100644 index 00000000000..275ff18f73b --- /dev/null +++ b/tests/queries/0_stateless/03262_filter_push_down_view.reference @@ -0,0 +1,2 @@ +Condition: and((materialize(auid) in [1, 1]), (_CAST(toDate(ts)) in (-Inf, 1703980800])) +Granules: 1/3 diff --git a/tests/queries/0_stateless/03262_filter_push_down_view.sql b/tests/queries/0_stateless/03262_filter_push_down_view.sql new file mode 100644 index 00000000000..8492d8c8ebd --- /dev/null +++ b/tests/queries/0_stateless/03262_filter_push_down_view.sql @@ -0,0 +1,36 @@ +DROP TABLE IF EXISTS alpha; +DROP TABLE IF EXISTS alpha__day; + +SET session_timezone = 'Etc/UTC'; + +CREATE TABLE alpha +( + `ts` DateTime64(6), + `auid` Int64, +) +ENGINE = MergeTree +ORDER BY (auid, ts) +SETTINGS index_granularity = 1; + +CREATE VIEW alpha__day +( + `ts_date` Date, + `auid` Int64, +) +AS SELECT + ts_date, + auid, +FROM +( + SELECT + toDate(ts) AS ts_date, + auid + FROM alpha +) +WHERE ts_date <= toDateTime('2024-01-01 00:00:00') - INTERVAL 1 DAY; + +INSERT INTO alpha VALUES (toDateTime64('2024-01-01 00:00:00.000', 3) - INTERVAL 3 DAY, 1); +INSERT INTO alpha VALUES (toDateTime64('2024-01-01 00:00:00.000', 3) - INTERVAL 3 DAY, 2); +INSERT INTO alpha VALUES (toDateTime64('2024-01-01 00:00:00.000', 3) - INTERVAL 3 DAY, 3); + +select trimLeft(explain) from (EXPLAIN indexes = 1 SELECT auid FROM alpha__day WHERE auid = 1) where explain like '%Condition:%' or explain like '%Granules:%' settings allow_experimental_analyzer = 1; diff --git a/tests/queries/0_stateless/03268_empty_tuple_update.reference b/tests/queries/0_stateless/03268_empty_tuple_update.reference new file mode 100644 index 00000000000..30bc45d7a18 --- /dev/null +++ b/tests/queries/0_stateless/03268_empty_tuple_update.reference @@ -0,0 +1 @@ +() 2 diff --git a/tests/queries/0_stateless/03268_empty_tuple_update.sql b/tests/queries/0_stateless/03268_empty_tuple_update.sql new file mode 100644 index 00000000000..343117719fc --- /dev/null +++ b/tests/queries/0_stateless/03268_empty_tuple_update.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t0; + +CREATE TABLE t0 (c0 Tuple(), c1 int) ENGINE = Memory(); + +INSERT INTO t0 VALUES ((), 1); + +ALTER TABLE t0 UPDATE c0 = (), c1 = 2 WHERE EXISTS (SELECT 1); + +SELECT * FROM t0; + +DROP TABLE t0;