Merge remote-tracking branch 'origin/master' into igor/remove_redundant_order_by

2024-12-12 01:12:12 +00:00 · 2022-12-13 10:55:50 +00:00 · 2022-12-13 10:55:50 +00:00 · 1bed5064e7
commit 1bed5064e7
parent 6f5fc6645b 3ae9f121d9
38 changed files with 450 additions and 134 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -448,12 +448,7 @@ else()
    link_libraries(global-group)
 endif ()

-if (NOT (OS_LINUX OR OS_DARWIN))
-    # Using system libs can cause a lot of warnings in includes (on macro expansion).
-    option(WERROR "Enable -Werror compiler option" OFF)
-else ()
-    option(WERROR "Enable -Werror compiler option" ON)
-endif ()
+option(WERROR "Enable -Werror compiler option" ON)

 if (WERROR)
    # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks.
--- a/base/readpassphrase/readpassphrase.c
+++ b/base/readpassphrase/readpassphrase.c
@ -153,7 +153,7 @@ restart:

    /* Restore old terminal settings and signals. */
    if (memcmp(&term, &oterm, sizeof(term)) != 0) {
-        const int sigttou = signo[SIGTTOU];
+        const int sigttou = (int)signo[SIGTTOU];

        /* Ignore SIGTTOU generated when we are not the fg pgrp. */
        while (tcsetattr(input, TCSAFLUSH|TCSASOFT, &oterm) == -1 &&
--- a/docs/en/sql-reference/data-types/float.md
+++ b/docs/en/sql-reference/data-types/float.md
@ -6,6 +6,26 @@ sidebar_label: Float32, Float64

 # Float32, Float64

+:::warning
+If you need accurate calculations, in particular if you work with financial or business data requiring a high precision you should consider using Decimal instead. Floats might lead to inaccurate results as illustrated below:
+
+```
+CREATE TABLE IF NOT EXISTS float_vs_decimal
+(
+   my_float Float64,
+   my_decimal Decimal64(3)
+)Engine=MergeTree ORDER BY tuple()
+
+INSERT INTO float_vs_decimal SELECT round(canonicalRand(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal
+
+SELECT sum(my_float), sum(my_decimal) FROM float_vs_decimal;
+> 500279.56300000014	500279.563
+
+SELECT sumKahan(my_float), sumKahan(my_decimal) FROM float_vs_decimal;
+> 500279.563	500279.563
+```
+:::
+
 [Floating point numbers](https://en.wikipedia.org/wiki/IEEE_754).

 Types are equivalent to types of C:
@ -13,8 +33,6 @@ Types are equivalent to types of C:
 -   `Float32` — `float`.
 -   `Float64` — `double`.

-We recommend that you store data in integer form whenever possible. For example, convert fixed precision numbers to integer values, such as monetary amounts or page load times in milliseconds.
-
 Aliases:

 -   `Float32` — `FLOAT`.
--- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx
+++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx
@ -41,7 +41,7 @@ ORDER BY (postcode1, postcode2, addr1, addr2);
 我们将使用 `url` 函数将数据流式传输到 ClickHouse。我们需要首先预处理一些传入的数据，其中包括： 

 - 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`，因为这更适合存储和查询 
- 将`time` 字段转换为日期为它只包含 00:00 时间 
+- 将`time` 字段转换为日期因为它只包含 00:00 时间 
 - 忽略 [UUid](/docs/zh/sql-reference/data-types/uuid.md) 字段，因为我们不需要它进行分析 
 - 使用 [transform](/docs/zh/sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段 
 - 将 `is_new` 字段从单字符串（` Y`/`N`) 到 [UInt8](/docs/zh/sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1 
--- a/src/Analyzer/FunctionNode.h
+++ b/src/Analyzer/FunctionNode.h
@ -7,6 +7,11 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int UNSUPPORTED_METHOD;
+}
+
 class IFunctionOverloadResolver;
 using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;

@ -189,6 +194,11 @@ public:

    DataTypePtr getResultType() const override
    {
+        if (!result_type)
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                "Function node with name '{}' is not resolved",
+                function_name);
+
        return result_type;
    }

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -3983,7 +3983,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
        const auto * constant_node = parameter_node->as<ConstantNode>();
        if (!constant_node)
            throw Exception(ErrorCodes::BAD_ARGUMENTS,
-            "Parameter for function {} expected to have constant value. Actual {}. In scope {}",
+            "Parameter for function '{}' expected to have constant value. Actual {}. In scope {}",
            function_name,
            parameter_node->formatASTForErrorMessage(),
            scope.scope_node->formatASTForErrorMessage());
@ -4079,7 +4079,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
    {
        auto & function_in_arguments_nodes = function_node.getArguments().getNodes();
        if (function_in_arguments_nodes.size() != 2)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} expects 2 arguments", function_name);
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function '{}' expects 2 arguments", function_name);

        auto & in_second_argument = function_in_arguments_nodes[1];
        auto * table_node = in_second_argument->as<TableNode>();
@ -4169,8 +4169,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi

        if (!argument_column.type)
            throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Function {} argument is not resolved. In scope {}",
-                function_node.getFunctionName(),
+                "Function '{}' argument is not resolved. In scope {}",
+                function_name,
                scope.scope_node->formatASTForErrorMessage());

        const auto * constant_node = function_argument->as<ConstantNode>();
@ -4220,7 +4220,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
            auto * lambda_expression = lambda_expression_untyped->as<LambdaNode>();
            if (!lambda_expression)
                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                    "Function identifier {} must be resolved as lambda. Actual {}. In scope {}",
+                    "Function identifier '{}' must be resolved as lambda. Actual {}. In scope {}",
                    function_node.getFunctionName(),
                    lambda_expression_untyped->formatASTForErrorMessage(),
                    scope.scope_node->formatASTForErrorMessage());
@ -4253,7 +4253,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
            const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
            if (!tuple_data_type)
                throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
-                    "Function untuple argument must be have compound type. Actual type {}. In scope {}",
+                    "Function 'untuple' argument must have compound type. Actual type {}. In scope {}",
                    result_type->getName(),
                    scope.scope_node->formatASTForErrorMessage());

@ -4311,7 +4311,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
    {
        if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
        {
-            std::string error_message = fmt::format("Aggregate function with name {} does not exists. In scope {}",
+            std::string error_message = fmt::format("Aggregate function with name '{}' does not exists. In scope {}",
               function_name,
               scope.scope_node->formatASTForErrorMessage());

@ -4319,6 +4319,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
            throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, error_message);
        }

+        if (!function_lambda_arguments_indexes.empty())
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                "Window function '{}' does not support lambda arguments",
+                function_name);
+
        AggregateFunctionProperties properties;
        auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties);

@ -4368,12 +4373,17 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
            auto hints = name_prompter.getHints(function_name, possible_function_names);

            throw Exception(ErrorCodes::UNKNOWN_FUNCTION,
-                "Function with name {} does not exists. In scope {}{}",
+                "Function with name '{}' does not exists. In scope {}{}",
                function_name,
                scope.scope_node->formatASTForErrorMessage(),
                getHintsErrorMessageSuffix(hints));
        }

+        if (!function_lambda_arguments_indexes.empty())
+            throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                "Aggregate function '{}' does not support lambda arguments",
+                function_name);
+
        AggregateFunctionProperties properties;
        auto aggregate_function = AggregateFunctionFactory::instance().get(function_name, argument_types, parameters, properties);
        function_node.resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType());
@ -4404,7 +4414,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
            const auto * function_data_type = typeid_cast<const DataTypeFunction *>(argument_types[function_lambda_argument_index].get());
            if (!function_data_type)
                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                    "Function {} expected function data type for lambda argument with index {}. Actual {}. In scope {}",
+                    "Function '{}' expected function data type for lambda argument with index {}. Actual {}. In scope {}",
                    function_name,
                    function_lambda_argument_index,
                    argument_types[function_lambda_argument_index]->getName(),
@ -4414,7 +4424,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
            size_t function_data_type_arguments_size = function_data_type_argument_types.size();
            if (function_data_type_arguments_size != lambda_arguments_size)
                throw Exception(ErrorCodes::LOGICAL_ERROR,
-                    "Function {} function data type for lambda argument with index {} arguments size mismatch. Actual {}. Expected {}. In scope {}",
+                    "Function '{}' function data type for lambda argument with index {} arguments size mismatch. Actual {}. Expected {}. In scope {}",
                    function_name,
                    function_data_type_arguments_size,
                    lambda_arguments_size,
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@ -23,6 +23,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int S3_ERROR;
+    extern const int INVALID_CONFIG_PARAMETER;
    extern const int LOGICAL_ERROR;
 }

@ -222,8 +223,21 @@ void BackupWriterS3::copyObjectMultipartImpl(

    for (size_t part_number = 1; position < size; ++part_number)
    {
+        /// Check that part number is not too big.
+        if (part_number > request_settings.max_part_number)
+        {
+            throw Exception(
+                ErrorCodes::INVALID_CONFIG_PARAMETER,
+                "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
+                "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_operation_copy_size = {}",
+                request_settings.max_part_number, size, request_settings.min_upload_part_size, request_settings.max_upload_part_size,
+                request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold,
+                request_settings.max_single_operation_copy_size);
+        }
+
        size_t next_position = std::min(position + upload_part_size, size);

+        /// Make a copy request to copy a part.
        Aws::S3::Model::UploadPartCopyRequest part_request;
        part_request.SetCopySource(src_bucket + "/" + src_key);
        part_request.SetBucket(dst_bucket);
@ -250,6 +264,7 @@ void BackupWriterS3::copyObjectMultipartImpl(

        position = next_position;

+        /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
        if (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0)
        {
            upload_part_size *= request_settings.upload_part_size_multiply_factor;
--- a/src/Common/HashTable/StringHashTable.h
+++ b/src/Common/HashTable/StringHashTable.h
@ -22,17 +22,29 @@ struct StringKey24
 inline StringRef ALWAYS_INLINE toStringRef(const StringKey8 & n)
 {
    assert(n != 0);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    return {reinterpret_cast<const char *>(&n), 8ul - (std::countr_zero(n) >> 3)};
+#else
    return {reinterpret_cast<const char *>(&n), 8ul - (std::countl_zero(n) >> 3)};
+#endif
 }
 inline StringRef ALWAYS_INLINE toStringRef(const StringKey16 & n)
 {
    assert(n.items[1] != 0);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    return {reinterpret_cast<const char *>(&n), 16ul - (std::countr_zero(n.items[1]) >> 3)};
+#else
    return {reinterpret_cast<const char *>(&n), 16ul - (std::countl_zero(n.items[1]) >> 3)};
+#endif
 }
 inline StringRef ALWAYS_INLINE toStringRef(const StringKey24 & n)
 {
    assert(n.c != 0);
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    return {reinterpret_cast<const char *>(&n), 24ul - (std::countr_zero(n.c) >> 3)};
+#else
    return {reinterpret_cast<const char *>(&n), 24ul - (std::countl_zero(n.c) >> 3)};
+#endif
 }

 struct StringHashTableHash
@ -238,7 +250,6 @@ public:
    // 2. Use switch case extension to generate fast dispatching table
    // 3. Funcs are named callables that can be force_inlined
    //
-    // NOTE: It relies on Little Endianness
    //
    // NOTE: It requires padded to 8 bytes keys (IOW you cannot pass
    // std::string here, but you can pass i.e. ColumnString::getDataAt()),
@ -280,13 +291,19 @@ public:
                if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
                {
                    memcpy(&n[0], p, 8);
+                    if constexpr (std::endian::native == std::endian::little)
                        n[0] &= -1ULL >> s;
+                    else
+                        n[0] &= -1ULL << s;
                }
                else
                {
                    const char * lp = x.data + x.size - 8;
                    memcpy(&n[0], lp, 8);
+                    if constexpr (std::endian::native == std::endian::little)
                        n[0] >>= s;
+                    else
+                        n[0] <<= s;
                }
                keyHolderDiscardKey(key_holder);
                return func(self.m1, k8, hash(k8));
@ -296,7 +313,10 @@ public:
                memcpy(&n[0], p, 8);
                const char * lp = x.data + x.size - 8;
                memcpy(&n[1], lp, 8);
+                if constexpr (std::endian::native == std::endian::little)
                    n[1] >>= s;
+                else
+                    n[1] <<= s;
                keyHolderDiscardKey(key_holder);
                return func(self.m2, k16, hash(k16));
            }
@ -305,7 +325,10 @@ public:
                memcpy(&n[0], p, 16);
                const char * lp = x.data + x.size - 8;
                memcpy(&n[2], lp, 8);
+                if constexpr (std::endian::native == std::endian::little)
                    n[2] >>= s;
+                else
+                    n[2] <<= s;
                keyHolderDiscardKey(key_holder);
                return func(self.m3, k24, hash(k24));
            }
--- a/src/Common/MemoryStatisticsOS.h
+++ b/src/Common/MemoryStatisticsOS.h
@ -44,7 +44,7 @@ private:
    int fd;
 #endif
 #if defined(OS_FREEBSD)
-    int pagesize;
+    size_t pagesize;
    pid_t self;
 #endif
 };
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@ -136,7 +136,7 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const std::string & snapshot_pa
            return;

        S3Settings::RequestSettings request_settings_1;
-        request_settings_1.upload_part_size_multiply_parts_count_threshold = 10000;
+        request_settings_1.setEmptyFieldsByDefault();

        const auto create_writer = [&](const auto & key)
        {
--- a/src/Disks/ObjectStorages/S3/diskSettings.cpp
+++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp
@ -37,8 +37,10 @@ std::unique_ptr<S3ObjectStorageSettings> getSettings(const Poco::Util::AbstractC
    S3Settings::RequestSettings request_settings;
    request_settings.max_single_read_retries = config.getUInt64(config_prefix + ".s3_max_single_read_retries", settings.s3_max_single_read_retries);
    request_settings.min_upload_part_size = config.getUInt64(config_prefix + ".s3_min_upload_part_size", settings.s3_min_upload_part_size);
+    request_settings.max_upload_part_size = config.getUInt64(config_prefix + ".s3_max_upload_part_size", S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE);
    request_settings.upload_part_size_multiply_factor = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_factor", settings.s3_upload_part_size_multiply_factor);
    request_settings.upload_part_size_multiply_parts_count_threshold = config.getUInt64(config_prefix + ".s3_upload_part_size_multiply_parts_count_threshold", settings.s3_upload_part_size_multiply_parts_count_threshold);
+    request_settings.max_part_number = config.getUInt64(config_prefix + ".s3_max_part_number", S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER);
    request_settings.max_single_part_upload_size = config.getUInt64(config_prefix + ".s3_max_single_part_upload_size", settings.s3_max_single_part_upload_size);
    request_settings.check_objects_after_upload = config.getUInt64(config_prefix + ".s3_check_objects_after_upload", settings.s3_check_objects_after_upload);
    request_settings.max_unexpected_write_error_retries = config.getUInt64(config_prefix + ".s3_max_unexpected_write_error_retries", settings.s3_max_unexpected_write_error_retries);
--- a/src/IO/AIO.cpp
+++ b/src/IO/AIO.cpp
@ -124,12 +124,12 @@ int io_submit(int ctx, long nr, struct iocb * iocbpp[])
        }
    }

-    return nr;
+    return static_cast<int>(nr);
 }

 int io_getevents(int ctx, long, long max_nr, struct kevent * events, struct timespec * timeout)
 {
-    return kevent(ctx, nullptr, 0, events, max_nr, timeout);
+    return kevent(ctx, nullptr, 0, events, static_cast<int>(max_nr), timeout);
 }


--- a/src/IO/ReadBufferFromS3.cpp
+++ b/src/IO/ReadBufferFromS3.cpp
@ -250,7 +250,7 @@ size_t ReadBufferFromS3::getFileSize()
    if (file_size)
        return *file_size;

-    auto object_size = S3::getObjectSize(client_ptr, bucket, key, version_id, true, read_settings.for_object_storage);
+    auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, true, read_settings.for_object_storage);

    file_size = object_size;
    return *file_size;
--- a/src/IO/S3Common.cpp
+++ b/src/IO/S3Common.cpp
@ -852,7 +852,7 @@ namespace S3
    }


-    S3::ObjectInfo getObjectInfo(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
+    S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
    {
        ProfileEvents::increment(ProfileEvents::S3HeadObject);
        if (for_disk_s3)
@ -865,7 +865,7 @@ namespace S3
        if (!version_id.empty())
            req.SetVersionId(version_id);

-        Aws::S3::Model::HeadObjectOutcome outcome = client_ptr->HeadObject(req);
+        Aws::S3::Model::HeadObjectOutcome outcome = client.HeadObject(req);

        if (outcome.IsSuccess())
        {
@ -879,9 +879,9 @@ namespace S3
        return {};
    }

-    size_t getObjectSize(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
+    size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3)
    {
-        return getObjectInfo(client_ptr, bucket, key, version_id, throw_on_error, for_disk_s3).size;
+        return getObjectInfo(client, bucket, key, version_id, throw_on_error, for_disk_s3).size;
    }

 }
--- a/src/IO/S3Common.h
+++ b/src/IO/S3Common.h
@ -130,9 +130,9 @@ struct ObjectInfo
    time_t last_modification_time = 0;
 };

-S3::ObjectInfo getObjectInfo(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
+S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);

-size_t getObjectSize(std::shared_ptr<const Aws::S3::S3Client> client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);
+size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3);

 }
 #endif
--- a/src/IO/WriteBufferFromS3.cpp
+++ b/src/IO/WriteBufferFromS3.cpp
@ -50,6 +50,7 @@ const int S3_WARN_MAX_PARTS = 10000;
 namespace ErrorCodes
 {
    extern const int S3_ERROR;
+    extern const int INVALID_CONFIG_PARAMETER;
 }

 struct WriteBufferFromS3::UploadPartTask
@ -122,12 +123,6 @@ void WriteBufferFromS3::nextImpl()

 void WriteBufferFromS3::allocateBuffer()
 {
-    if (total_parts_uploaded != 0 && total_parts_uploaded % request_settings.upload_part_size_multiply_parts_count_threshold == 0)
-    {
-        upload_part_size *= request_settings.upload_part_size_multiply_factor;
-        upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size);
-    }
-
    temporary_buffer = Aws::MakeShared<Aws::StringStream>("temporary buffer");
    temporary_buffer->exceptions(std::ios::badbit);
    last_part_size = 0;
@ -257,13 +252,10 @@ void WriteBufferFromS3::writePart()
    {
        UploadPartTask * task = nullptr;

-        int part_number;
        {
            std::lock_guard lock(bg_tasks_mutex);
-
            task = &upload_object_tasks.emplace_back();
            ++num_added_bg_tasks;
-            part_number = num_added_bg_tasks;
        }

        /// Notify waiting thread when task finished
@ -281,7 +273,7 @@ void WriteBufferFromS3::writePart()

        try
        {
-            fillUploadRequest(task->req, part_number);
+            fillUploadRequest(task->req);

            schedule([this, task, task_finish_notify]()
            {
@ -308,23 +300,44 @@ void WriteBufferFromS3::writePart()
        UploadPartTask task;
        auto & tags = TSA_SUPPRESS_WARNING_FOR_WRITE(part_tags); /// Suppress warning because schedule == false.

-        fillUploadRequest(task.req, static_cast<int>(tags.size() + 1));
+        fillUploadRequest(task.req);
        processUploadRequest(task);
        tags.push_back(task.tag);
    }
 }

-void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & req, int part_number)
+void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & req)
 {
+    /// Increase part number.
+    ++part_number;
+    if (!multipart_upload_id.empty() && (part_number > request_settings.max_part_number))
+    {
+        throw Exception(
+            ErrorCodes::INVALID_CONFIG_PARAMETER,
+            "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, "
+            "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_part_upload_size = {}",
+            request_settings.max_part_number, count(), request_settings.min_upload_part_size, request_settings.max_upload_part_size,
+            request_settings.upload_part_size_multiply_factor, request_settings.upload_part_size_multiply_parts_count_threshold,
+            request_settings.max_single_part_upload_size);
+    }
+
+    /// Setup request.
    req.SetBucket(bucket);
    req.SetKey(key);
-    req.SetPartNumber(part_number);
+    req.SetPartNumber(static_cast<int>(part_number));
    req.SetUploadId(multipart_upload_id);
    req.SetContentLength(temporary_buffer->tellp());
    req.SetBody(temporary_buffer);

    /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840
    req.SetContentType("binary/octet-stream");
+
+    /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`).
+    if (!multipart_upload_id.empty() && (part_number % request_settings.upload_part_size_multiply_parts_count_threshold == 0))
+    {
+        upload_part_size *= request_settings.upload_part_size_multiply_factor;
+        upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size);
+    }
 }

 void WriteBufferFromS3::processUploadRequest(UploadPartTask & task)
@ -343,8 +356,6 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task)
    }
    else
        throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType());
-
-    total_parts_uploaded++;
 }

 void WriteBufferFromS3::completeMultipartUpload()
--- a/src/IO/WriteBufferFromS3.h
+++ b/src/IO/WriteBufferFromS3.h
@ -75,7 +75,7 @@ private:
    void finalizeImpl() override;

    struct UploadPartTask;
-    void fillUploadRequest(Aws::S3::Model::UploadPartRequest & req, int part_number);
+    void fillUploadRequest(Aws::S3::Model::UploadPartRequest & req);
    void processUploadRequest(UploadPartTask & task);

    struct PutObjectTask;
@ -95,7 +95,7 @@ private:
    size_t upload_part_size = 0;
    std::shared_ptr<Aws::StringStream> temporary_buffer; /// Buffer to accumulate data.
    size_t last_part_size = 0;
-    std::atomic<size_t> total_parts_uploaded = 0;
+    size_t part_number = 0;

    /// Upload in S3 is made in parts.
    /// We initiate upload, then upload each part and get ETag as a response, and then finalizeImpl() upload with listing all our parts.
--- a/src/Interpreters/ClientInfo.cpp
+++ b/src/Interpreters/ClientInfo.cpp
@ -203,7 +203,7 @@ void ClientInfo::setInitialQuery()
 void ClientInfo::fillOSUserHostNameAndVersionInfo()
 {
    os_user.resize(256, '\0');
-    if (0 == getlogin_r(os_user.data(), os_user.size() - 1))
+    if (0 == getlogin_r(os_user.data(), static_cast<int>(os_user.size() - 1)))
        os_user.resize(strlen(os_user.c_str()));
    else
        os_user.clear();    /// Don't mind if we cannot determine user login.
--- a/src/Processors/QueryPlan/QueryPlan.h
+++ b/src/Processors/QueryPlan/QueryPlan.h
@ -3,7 +3,7 @@
 #include <Core/Names.h>
 #include <Interpreters/Context_fwd.h>
 #include <Columns/IColumn.h>
-#include <QueryPipeline/PipelineResourcesHolder.h>
+#include <QueryPipeline/QueryPlanResourceHolder.h>

 #include <list>
 #include <memory>
--- a/src/QueryPipeline/Chain.h
+++ b/src/QueryPipeline/Chain.h
@ -2,7 +2,7 @@

 #include <Interpreters/Context_fwd.h>
 #include <Processors/IProcessor.h>
-#include <QueryPipeline/PipelineResourcesHolder.h>
+#include <QueryPipeline/QueryPlanResourceHolder.h>

 namespace DB
 {
--- a/src/QueryPipeline/Pipe.h
+++ b/src/QueryPipeline/Pipe.h
@ -1,7 +1,7 @@
 #pragma once

 #include <Processors/IProcessor.h>
-#include <QueryPipeline/PipelineResourcesHolder.h>
+#include <QueryPipeline/QueryPlanResourceHolder.h>
 #include <QueryPipeline/Chain.h>
 #include <QueryPipeline/SizeLimits.h>

--- a/src/QueryPipeline/QueryPipeline.h
+++ b/src/QueryPipeline/QueryPipeline.h
@ -1,5 +1,5 @@
 #pragma once
-#include <QueryPipeline/PipelineResourcesHolder.h>
+#include <QueryPipeline/QueryPlanResourceHolder.h>
 #include <QueryPipeline/SizeLimits.h>
 #include <QueryPipeline/StreamLocalLimits.h>
 #include <functional>
--- a/src/QueryPipeline/QueryPlanResourceHolder.cpp
+++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp
@ -1,4 +1,4 @@
-#include <QueryPipeline/PipelineResourcesHolder.h>
+#include <QueryPipeline/QueryPlanResourceHolder.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/QueryPlan/QueryIdHolder.h>

--- a/src/QueryPipeline/QueryPlanResourceHolder.h
+++ b/src/QueryPipeline/QueryPlanResourceHolder.h
--- a/src/Storages/LiveView/StorageLiveView.h
+++ b/src/Storages/LiveView/StorageLiveView.h
@ -21,16 +21,12 @@ limitations under the License. */
 namespace DB
 {

-using Time = std::chrono::time_point<std::chrono::system_clock>;
-using Seconds = std::chrono::seconds;
-using MilliSeconds = std::chrono::milliseconds;
-

 struct BlocksMetadata
 {
    String hash;
    UInt64 version;
-    Time time;
+    std::chrono::time_point<std::chrono::system_clock> time;
 };

 struct MergeableBlocks
@ -54,6 +50,10 @@ friend class LiveViewSource;
 friend class LiveViewEventsSource;
 friend class LiveViewSink;

+using Time = std::chrono::time_point<std::chrono::system_clock>;
+using Seconds = std::chrono::seconds;
+using MilliSeconds = std::chrono::milliseconds;
+
 public:
    StorageLiveView(
        const StorageID & table_id_,
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -4525,6 +4525,7 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr<RestoredPartsHolder> r
        auto read_buffer = backup_entry->getReadBuffer();
        auto write_buffer = disk->writeFile(temp_part_dir / filename);
        copyData(*read_buffer, *write_buffer);
+        write_buffer->finalize();
        reservation->update(reservation->getSize() - backup_entry->getSize());
    }

--- a/src/Storages/ReadFromStorageProgress.cpp
+++ b/src/Storages/ReadFromStorageProgress.cpp
@ -0,0 +1,53 @@
+#include <Storages/ReadFromStorageProgress.h>
+#include <Processors/ISource.h>
+#include <QueryPipeline/StreamLocalLimits.h>
+
+namespace DB
+{
+
+void updateRowsProgressApprox(
+    ISource & source,
+    const Chunk & chunk,
+    UInt64 total_result_size,
+    UInt64 & total_rows_approx_accumulated,
+    size_t & total_rows_count_times,
+    UInt64 & total_rows_approx_max)
+{
+    if (!total_result_size)
+        return;
+
+    const size_t num_rows = chunk.getNumRows();
+
+    if (!num_rows)
+        return;
+
+    const auto progress = source.getReadProgress();
+    if (progress && !progress->limits.empty())
+    {
+        for (const auto & limit : progress->limits)
+        {
+            if (limit.leaf_limits.max_rows || limit.leaf_limits.max_bytes
+                || limit.local_limits.size_limits.max_rows || limit.local_limits.size_limits.max_bytes)
+                return;
+        }
+    }
+
+    const auto bytes_per_row = std::ceil(static_cast<double>(chunk.bytes()) / num_rows);
+    size_t total_rows_approx = static_cast<size_t>(std::ceil(static_cast<double>(total_result_size) / bytes_per_row));
+    total_rows_approx_accumulated += total_rows_approx;
+    ++total_rows_count_times;
+    total_rows_approx = total_rows_approx_accumulated / total_rows_count_times;
+
+    /// We need to add diff, because total_rows_approx is incremental value.
+    /// It would be more correct to send total_rows_approx as is (not a diff),
+    /// but incrementation of total_rows_to_read does not allow that.
+    /// A new counter can be introduced for that to be sent to client, but it does not worth it.
+    if (total_rows_approx > total_rows_approx_max)
+    {
+        size_t diff = total_rows_approx - total_rows_approx_max;
+        source.addTotalRowsApprox(diff);
+        total_rows_approx_max = total_rows_approx;
+    }
+}
+
+}
--- a/src/Storages/ReadFromStorageProgress.h
+++ b/src/Storages/ReadFromStorageProgress.h
@ -0,0 +1,18 @@
+#pragma once
+#include <Core/Types.h>
+
+namespace DB
+{
+
+class ISource;
+class Chunk;
+
+void updateRowsProgressApprox(
+    ISource & source,
+    const Chunk & chunk,
+    UInt64 total_result_size,
+    UInt64 & total_rows_approx_accumulated,
+    size_t & total_rows_count_times,
+    UInt64 & total_rows_approx_max);
+
+}
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@ -5,6 +5,7 @@
 #include <Storages/PartitionedSink.h>
 #include <Storages/Distributed/DirectoryMonitor.h>
 #include <Storages/checkAndGetLiteralArgument.h>
+#include <Storages/ReadFromStorageProgress.h>

 #include <Interpreters/Context.h>
 #include <Interpreters/evaluateConstantExpression.h>
@ -592,22 +593,8 @@ public:

                if (num_rows)
                {
-                    auto bytes_per_row = std::ceil(static_cast<double>(chunk.bytes()) / num_rows);
-                    size_t total_rows_approx = static_cast<size_t>(std::ceil(static_cast<double>(files_info->total_bytes_to_read) / bytes_per_row));
-                    total_rows_approx_accumulated += total_rows_approx;
-                    ++total_rows_count_times;
-                    total_rows_approx = total_rows_approx_accumulated / total_rows_count_times;
-
-                    /// We need to add diff, because total_rows_approx is incremental value.
-                    /// It would be more correct to send total_rows_approx as is (not a diff),
-                    /// but incrementation of total_rows_to_read does not allow that.
-                    /// A new field can be introduces for that to be sent to client, but it does not worth it.
-                    if (total_rows_approx > total_rows_approx_prev)
-                    {
-                        size_t diff = total_rows_approx - total_rows_approx_prev;
-                        addTotalRowsApprox(diff);
-                        total_rows_approx_prev = total_rows_approx;
-                    }
+                    updateRowsProgressApprox(
+                        *this, chunk, files_info->total_bytes_to_read, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
                }
                return chunk;
            }
@ -648,7 +635,7 @@ private:

    UInt64 total_rows_approx_accumulated = 0;
    size_t total_rows_count_times = 0;
-    UInt64 total_rows_approx_prev = 0;
+    UInt64 total_rows_approx_max = 0;
 };


--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@ -1032,6 +1032,7 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p
            auto in = backup_entry->getReadBuffer();
            auto out = disk->writeFile(data_file.path, max_compress_block_size, WriteMode::Append);
            copyData(*in, *out);
+            out->finalize();
        }

        if (use_marks_file)
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@ -28,6 +28,7 @@
 #include <Storages/getVirtualsForStorage.h>
 #include <Storages/checkAndGetLiteralArgument.h>
 #include <Storages/StorageURL.h>
+#include <Storages/ReadFromStorageProgress.h>

 #include <IO/ReadBufferFromS3.h>
 #include <IO/WriteBufferFromS3.h>
@ -153,6 +154,11 @@ public:
        return nextAssumeLocked();
    }

+    size_t getTotalSize() const
+    {
+        return total_size;
+    }
+
 private:

    String nextAssumeLocked()
@ -198,19 +204,27 @@ private:
            if (block.has("_file"))
                file_column = block.getByName("_file").column->assumeMutable();

-            for (const auto & row : result_batch)
+            std::unordered_map<String, S3::ObjectInfo> all_object_infos;
+            for (const auto & key_info : result_batch)
            {
-                const String & key = row.GetKey();
+                const String & key = key_info.GetKey();
                if (recursive || re2::RE2::FullMatch(key, *matcher))
                {
                    String path = fs::path(globbed_uri.bucket) / key;
-                    if (object_infos)
-                        (*object_infos)[path] = {.size = size_t(row.GetSize()), .last_modification_time = row.GetLastModified().Millis() / 1000};
-                    String file = path.substr(path.find_last_of('/') + 1);
+                    const size_t key_size = key_info.GetSize();
+
+                    all_object_infos.emplace(path, S3::ObjectInfo{.size = key_size, .last_modification_time = key_info.GetLastModified().Millis() / 1000});
+
                    if (path_column)
+                    {
                        path_column->insert(path);
+                    }
                    if (file_column)
+                    {
+                        String file = path.substr(path.find_last_of('/') + 1);
                        file_column->insert(file);
+                    }
+
                    key_column->insert(key);
                }
            }
@ -220,18 +234,37 @@ private:
            size_t rows = block.rows();
            buffer.reserve(rows);
            for (size_t i = 0; i < rows; ++i)
-                buffer.emplace_back(keys.getDataAt(i).toString());
+            {
+                auto key = keys.getDataAt(i).toString();
+                std::string path = fs::path(globbed_uri.bucket) / key;
+
+                const auto & object_info = all_object_infos.at(path);
+                total_size += object_info.size;
+                if (object_infos)
+                    object_infos->emplace(path, object_info);
+
+                buffer.emplace_back(key);
+            }
        }
        else
        {
            buffer.reserve(result_batch.size());
-            for (const auto & row : result_batch)
+            for (const auto & key_info : result_batch)
            {
-                String key = row.GetKey();
+                String key = key_info.GetKey();
                if (recursive || re2::RE2::FullMatch(key, *matcher))
+                {
+                    const size_t key_size = key_info.GetSize();
+                    total_size += key_size;
+                    if (object_infos)
+                    {
+                        const std::string path = fs::path(globbed_uri.bucket) / key;
+                        (*object_infos)[path] = {.size = key_size, .last_modification_time = key_info.GetLastModified().Millis() / 1000};
+                    }
                    buffer.emplace_back(std::move(key));
                }
            }
+        }

        /// Set iterator only after the whole batch is processed
        buffer_iter = buffer.begin();
@ -261,6 +294,7 @@ private:
    std::unordered_map<String, S3::ObjectInfo> * object_infos;
    Strings * read_keys;
    S3Settings::RequestSettings request_settings;
+    size_t total_size = 0;
 };

 StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(
@ -281,12 +315,28 @@ String StorageS3Source::DisclosedGlobIterator::next()
    return pimpl->next();
 }

+size_t StorageS3Source::DisclosedGlobIterator::getTotalSize() const
+{
+    return pimpl->getTotalSize();
+}
+
 class StorageS3Source::KeysIterator::Impl : WithContext
 {
 public:
    explicit Impl(
-        const std::vector<String> & keys_, const String & bucket_, ASTPtr query_, const Block & virtual_header_, ContextPtr context_)
-        : WithContext(context_), keys(keys_), bucket(bucket_), query(query_), virtual_header(virtual_header_)
+        const Aws::S3::S3Client & client_,
+        const std::string & version_id_,
+        const std::vector<String> & keys_,
+        const String & bucket_,
+        ASTPtr query_,
+        const Block & virtual_header_,
+        ContextPtr context_,
+        std::unordered_map<String, S3::ObjectInfo> * object_infos_)
+        : WithContext(context_)
+        , keys(keys_)
+        , bucket(bucket_)
+        , query(query_)
+        , virtual_header(virtual_header_)
    {
        /// Create a virtual block with one row to construct filter
        if (query && virtual_header)
@ -316,14 +366,28 @@ public:
                if (block.has("_file"))
                    file_column = block.getByName("_file").column->assumeMutable();

+                std::unordered_map<String, S3::ObjectInfo> all_object_infos;
                for (const auto & key : keys)
                {
-                    String path = fs::path(bucket) / key;
-                    String file = path.substr(path.find_last_of('/') + 1);
+                    const String path = fs::path(bucket) / key;
+
+                    /// To avoid extra requests update total_size only if object_infos != nullptr
+                    /// (which means we eventually need this info anyway, so it should be ok to do it now).
+                    if (object_infos_)
+                    {
+                        auto key_info = S3::getObjectInfo(client_, bucket, key, version_id_, true, false);
+                        all_object_infos.emplace(path, S3::ObjectInfo{.size = key_info.size, .last_modification_time = key_info.last_modification_time});
+                    }
+
                    if (path_column)
+                    {
                        path_column->insert(path);
+                    }
                    if (file_column)
+                    {
+                        const String file = path.substr(path.find_last_of('/') + 1);
                        file_column->insert(file);
+                    }
                    key_column->insert(key);
                }

@ -333,7 +397,19 @@ public:
                Strings filtered_keys;
                filtered_keys.reserve(rows);
                for (size_t i = 0; i < rows; ++i)
-                    filtered_keys.emplace_back(keys_col.getDataAt(i).toString());
+                {
+                    auto key = keys_col.getDataAt(i).toString();
+
+                    if (object_infos_)
+                    {
+                        std::string path = fs::path(bucket) / key;
+                        const auto & object_info = all_object_infos.at(path);
+                        total_size += object_info.size;
+                        object_infos_->emplace(path, object_info);
+                    }
+
+                    filtered_keys.emplace_back(key);
+                }

                keys = std::move(filtered_keys);
            }
@ -348,6 +424,11 @@ public:
        return keys[current_index];
    }

+    size_t getTotalSize() const
+    {
+        return total_size;
+    }
+
 private:
    Strings keys;
    std::atomic_size_t index = 0;
@ -355,11 +436,21 @@ private:
    String bucket;
    ASTPtr query;
    Block virtual_header;
+
+    size_t total_size = 0;
 };

 StorageS3Source::KeysIterator::KeysIterator(
-    const std::vector<String> & keys_, const String & bucket_, ASTPtr query, const Block & virtual_header, ContextPtr context)
-    : pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(keys_, bucket_, query, virtual_header, context))
+    const Aws::S3::S3Client & client_,
+    const std::string & version_id_,
+    const std::vector<String> & keys_,
+    const String & bucket_,
+    ASTPtr query,
+    const Block & virtual_header,
+    ContextPtr context,
+    std::unordered_map<String, S3::ObjectInfo> * object_infos_)
+    : pimpl(std::make_shared<StorageS3Source::KeysIterator::Impl>(
+                client_, version_id_, keys_, bucket_, query, virtual_header, context, object_infos_))
 {
 }

@ -368,6 +459,11 @@ String StorageS3Source::KeysIterator::next()
    return pimpl->next();
 }

+size_t StorageS3Source::KeysIterator::getTotalSize() const
+{
+    return pimpl->getTotalSize();
+}
+
 Block StorageS3Source::getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns)
 {
    for (const auto & virtual_column : requested_virtual_columns)
@ -390,7 +486,7 @@ StorageS3Source::StorageS3Source(
    const std::shared_ptr<const Aws::S3::S3Client> & client_,
    const String & bucket_,
    const String & version_id_,
-    std::shared_ptr<IteratorWrapper> file_iterator_,
+    std::shared_ptr<IIterator> file_iterator_,
    const size_t download_thread_num_,
    const std::unordered_map<String, S3::ObjectInfo> & object_infos_)
    : ISource(getHeader(sample_block_, requested_virtual_columns_))
@ -459,7 +555,7 @@ std::unique_ptr<ReadBuffer> StorageS3Source::createS3ReadBuffer(const String & k
    if (it != object_infos.end())
        object_size = it->second.size;
    else
-        object_size = DB::S3::getObjectSize(client, bucket, key, version_id, false, false);
+        object_size = DB::S3::getObjectSize(*client, bucket, key, version_id, false, false);

    auto download_buffer_size = getContext()->getSettings().max_download_buffer_size;
    const bool use_parallel_download = download_buffer_size > 0 && download_thread_num > 1;
@ -503,6 +599,13 @@ Chunk StorageS3Source::generate()
        {
            UInt64 num_rows = chunk.getNumRows();

+            auto it = object_infos.find(file_path);
+            if (num_rows && it != object_infos.end())
+            {
+                updateRowsProgressApprox(
+                    *this, chunk, file_iterator->getTotalSize(), total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max);
+            }
+
            for (const auto & virtual_column : requested_virtual_columns)
            {
                if (virtual_column.name == "_path")
@ -797,7 +900,7 @@ StorageS3::StorageS3(
        virtual_block.insert({column.type->createColumn(), column.type, column.name});
 }

-std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(
+std::shared_ptr<StorageS3Source::IIterator> StorageS3::createFileIterator(
    const S3Configuration & s3_configuration,
    const std::vector<String> & keys,
    bool is_key_with_globs,
@ -810,25 +913,22 @@ std::shared_ptr<StorageS3Source::IteratorWrapper> StorageS3::createFileIterator(
 {
    if (distributed_processing)
    {
-        return std::make_shared<StorageS3Source::IteratorWrapper>(
-            [callback = local_context->getReadTaskCallback()]() -> String {
-                return callback();
-        });
+        return std::make_shared<StorageS3Source::ReadTaskIterator>(local_context->getReadTaskCallback());
    }
    else if (is_key_with_globs)
    {
        /// Iterate through disclosed globs and make a source for each file
-        auto glob_iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
-            *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys, s3_configuration.request_settings);
-        return std::make_shared<StorageS3Source::IteratorWrapper>([glob_iterator]() { return glob_iterator->next(); });
+        return std::make_shared<StorageS3Source::DisclosedGlobIterator>(
+            *s3_configuration.client, s3_configuration.uri, query, virtual_block,
+            local_context, object_infos, read_keys, s3_configuration.request_settings);
    }
    else
    {
-        auto keys_iterator
-            = std::make_shared<StorageS3Source::KeysIterator>(keys, s3_configuration.uri.bucket, query, virtual_block, local_context);
        if (read_keys)
            *read_keys = keys;
-        return std::make_shared<StorageS3Source::IteratorWrapper>([keys_iterator]() { return keys_iterator->next(); });
+
+        return std::make_shared<StorageS3Source::KeysIterator>(
+            *s3_configuration.client, s3_configuration.uri.version_id, keys, s3_configuration.uri.bucket, query, virtual_block, local_context, object_infos);
    }
 }

@ -869,7 +969,7 @@ Pipe StorageS3::read(
            requested_virtual_columns.push_back(virtual_column);
    }

-    std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper = createFileIterator(
+    std::shared_ptr<StorageS3Source::IIterator> iterator_wrapper = createFileIterator(
        s3_configuration,
        keys,
        is_key_with_globs,
@ -1369,7 +1469,7 @@ std::optional<ColumnsDescription> StorageS3::tryGetColumnsFromCache(
                /// Note that in case of exception in getObjectInfo returned info will be empty,
                /// but schema cache will handle this case and won't return columns from cache
                /// because we can't say that it's valid without last modification time.
-                info = S3::getObjectInfo(s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, false, false);
+                info = S3::getObjectInfo(*s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, false, false);
                if (object_infos)
                    (*object_infos)[path] = info;
            }
--- a/src/Storages/StorageS3.h
+++ b/src/Storages/StorageS3.h
@ -33,7 +33,17 @@ class StorageS3SequentialSource;
 class StorageS3Source : public ISource, WithContext
 {
 public:
-    class DisclosedGlobIterator
+    class IIterator
+    {
+    public:
+        virtual ~IIterator() = default;
+        virtual String next() = 0;
+        virtual size_t getTotalSize() const = 0;
+
+        String operator ()() { return next(); }
+    };
+
+    class DisclosedGlobIterator : public IIterator
    {
    public:
        DisclosedGlobIterator(
@ -46,7 +56,9 @@ public:
            Strings * read_keys_ = nullptr,
            const S3Settings::RequestSettings & request_settings_ = {});

-        String next();
+        String next() override;
+
+        size_t getTotalSize() const override;

    private:
        class Impl;
@ -54,12 +66,22 @@ public:
        std::shared_ptr<Impl> pimpl;
    };

-    class KeysIterator
+    class KeysIterator : public IIterator
    {
    public:
        explicit KeysIterator(
-            const std::vector<String> & keys_, const String & bucket_, ASTPtr query, const Block & virtual_header, ContextPtr context);
-        String next();
+            const Aws::S3::S3Client & client_,
+            const std::string & version_id_,
+            const std::vector<String> & keys_,
+            const String & bucket_,
+            ASTPtr query,
+            const Block & virtual_header,
+            ContextPtr context,
+            std::unordered_map<String, S3::ObjectInfo> * object_infos = nullptr);
+
+        String next() override;
+
+        size_t getTotalSize() const override;

    private:
        class Impl;
@ -67,7 +89,18 @@ public:
        std::shared_ptr<Impl> pimpl;
    };

-    using IteratorWrapper = std::function<String()>;
+    class ReadTaskIterator : public IIterator
+    {
+    public:
+        explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {}
+
+        String next() override { return callback(); }
+
+        size_t getTotalSize() const override { return 0; }
+
+    private:
+        ReadTaskCallback callback;
+    };

    static Block getHeader(Block sample_block, const std::vector<NameAndTypePair> & requested_virtual_columns);

@ -85,7 +118,7 @@ public:
        const std::shared_ptr<const Aws::S3::S3Client> & client_,
        const String & bucket,
        const String & version_id,
-        std::shared_ptr<IteratorWrapper> file_iterator_,
+        std::shared_ptr<IIterator> file_iterator_,
        size_t download_thread_num,
        const std::unordered_map<String, S3::ObjectInfo> & object_infos_);

@ -116,11 +149,15 @@ private:
    /// onCancel and generate can be called concurrently
    std::mutex reader_mutex;
    std::vector<NameAndTypePair> requested_virtual_columns;
-    std::shared_ptr<IteratorWrapper> file_iterator;
+    std::shared_ptr<IIterator> file_iterator;
    size_t download_thread_num = 1;

    Poco::Logger * log = &Poco::Logger::get("StorageS3Source");

+    UInt64 total_rows_approx_max = 0;
+    size_t total_rows_count_times = 0;
+    UInt64 total_rows_approx_accumulated = 0;
+
    std::unordered_map<String, S3::ObjectInfo> object_infos;

    /// Recreate ReadBuffer and Pipeline for each file.
@ -233,7 +270,7 @@ private:

    static void updateS3Configuration(ContextPtr, S3Configuration &);

-    static std::shared_ptr<StorageS3Source::IteratorWrapper> createFileIterator(
+    static std::shared_ptr<StorageS3Source::IIterator> createFileIterator(
        const S3Configuration & s3_configuration,
        const std::vector<String> & keys,
        bool is_key_with_globs,
--- a/src/Storages/StorageS3Cluster.cpp
+++ b/src/Storages/StorageS3Cluster.cpp
@ -102,7 +102,7 @@ Pipe StorageS3Cluster::read(

    auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(
        *s3_configuration.client, s3_configuration.uri, query_info.query, virtual_block, context);
-    auto callback = std::make_shared<StorageS3Source::IteratorWrapper>([iterator]() mutable -> String { return iterator->next(); });
+    auto callback = std::make_shared<std::function<String()>>([iterator]() mutable -> String { return iterator->next(); });

    /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*)
    auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze());
--- a/src/Storages/StorageS3Settings.cpp
+++ b/src/Storages/StorageS3Settings.cpp
@ -6,23 +6,12 @@
 #include <Common/Exception.h>
 #include <Common/Throttler.h>
 #include <Interpreters/Context.h>
-#include <base/unit.h>
 #include <boost/algorithm/string/predicate.hpp>


 namespace DB
 {

-namespace
-{
-    /// An object up to 5 GB can be copied in a single atomic operation.
-    constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5_GiB;
-
-    /// The maximum size of an uploaded part.
-    constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5_GiB;
-}
-
-
 void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings)
 {
    std::lock_guard lock(mutex);
@ -61,11 +50,12 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U
            S3Settings::RequestSettings request_settings;
            request_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries);
            request_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size);
-            request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE);
+            request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, S3Settings::RequestSettings::DEFAULT_MAX_UPLOAD_PART_SIZE);
            request_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor);
            request_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold);
+            request_settings.max_part_number = get_uint_for_key(key, "max_part_number", true, S3Settings::RequestSettings::DEFAULT_MAX_PART_NUMBER);
            request_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size);
-            request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE);
+            request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, S3Settings::RequestSettings::DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE);
            request_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections);
            request_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false);

@ -128,6 +118,8 @@ void S3Settings::RequestSettings::updateFromSettingsIfEmpty(const Settings & set
        upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor;
    if (!upload_part_size_multiply_parts_count_threshold)
        upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold;
+    if (!max_part_number)
+        max_part_number = DEFAULT_MAX_PART_NUMBER;
    if (!max_single_part_upload_size)
        max_single_part_upload_size = settings.s3_max_single_part_upload_size;
    if (!max_single_operation_copy_size)
--- a/src/Storages/StorageS3Settings.h
+++ b/src/Storages/StorageS3Settings.h
@ -31,6 +31,7 @@ struct S3Settings
        size_t max_upload_part_size = 0;
        size_t upload_part_size_multiply_factor = 0;
        size_t upload_part_size_multiply_parts_count_threshold = 0;
+        size_t max_part_number = 0;
        size_t max_single_part_upload_size = 0;
        size_t max_single_operation_copy_size = 0;
        size_t max_connections = 0;
@ -49,6 +50,7 @@ struct S3Settings
                && max_upload_part_size == other.max_upload_part_size
                && upload_part_size_multiply_factor == other.upload_part_size_multiply_factor
                && upload_part_size_multiply_parts_count_threshold == other.upload_part_size_multiply_parts_count_threshold
+                && max_part_number == other.max_part_number
                && max_single_part_upload_size == other.max_single_part_upload_size
                && max_single_operation_copy_size == other.max_single_operation_copy_size
                && max_connections == other.max_connections
@ -58,6 +60,18 @@ struct S3Settings
                && put_request_throttler == other.put_request_throttler;
        }

+        static const constexpr UInt64 DEFAULT_SINGLE_READ_RETRIES = 4;
+        static const constexpr UInt64 DEFAULT_MIN_UPLOAD_PART_SIZE = 16 * 1024 * 1024;
+        static const constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5ULL * 1024 * 1024 * 1024;
+        static const constexpr UInt64 DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR = 2;
+        static const constexpr UInt64 DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD = 500;
+        static const constexpr UInt64 DEFAULT_MAX_PART_NUMBER = 10000;
+        static const constexpr UInt64 DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE = 32 * 1024 * 1024;
+        static const constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5ULL * 1024 * 1024 * 1024;
+        static const constexpr UInt64 DEFAULT_MAX_CONNECTIONS = 1024;
+        static const constexpr UInt64 DEFAULT_MAX_UNEXPECTED_WRITE_ERRORS_RETRIES = 4;
+
+        void setEmptyFieldsByDefault();
        void updateFromSettingsIfEmpty(const Settings & settings);
    };

@ -83,4 +97,28 @@ private:
    std::map<const String, const S3Settings> s3_settings;
 };

+inline void S3Settings::RequestSettings::setEmptyFieldsByDefault()
+{
+    if (!max_single_read_retries)
+        max_single_read_retries = DEFAULT_SINGLE_READ_RETRIES;
+    if (!min_upload_part_size)
+        min_upload_part_size = DEFAULT_MIN_UPLOAD_PART_SIZE;
+    if (!max_upload_part_size)
+        max_upload_part_size = DEFAULT_MAX_UPLOAD_PART_SIZE;
+    if (!upload_part_size_multiply_factor)
+        upload_part_size_multiply_factor = DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_FACTOR;
+    if (!upload_part_size_multiply_parts_count_threshold)
+        upload_part_size_multiply_parts_count_threshold = DEFAULT_UPLOAD_PART_SIZE_MULTIPLY_PARTS_COUNT_THRESHOLD;
+    if (!max_part_number)
+        max_part_number = DEFAULT_MAX_PART_NUMBER;
+    if (!max_single_part_upload_size)
+        max_single_part_upload_size = DEFAULT_MAX_SINGLE_PART_UPLOAD_SIZE;
+    if (!max_single_operation_copy_size)
+        max_single_operation_copy_size = DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE;
+    if (!max_connections)
+        max_connections = DEFAULT_MAX_CONNECTIONS;
+    if (!max_unexpected_write_error_retries)
+        max_unexpected_write_error_retries = DEFAULT_MAX_UNEXPECTED_WRITE_ERRORS_RETRIES;
+}
+
 }
--- a/src/Storages/StorageStripeLog.cpp
+++ b/src/Storages/StorageStripeLog.cpp
@ -626,6 +626,7 @@ void StorageStripeLog::restoreDataImpl(const BackupPtr & backup, const String &
            auto in = backup_entry->getReadBuffer();
            auto out = disk->writeFile(data_file_path, max_compress_block_size, WriteMode::Append);
            copyData(*in, *out);
+            out->finalize();
        }

        /// Append the index.
--- a/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.reference
+++ b/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.reference
--- a/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.sql
+++ b/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.sql
@ -0,0 +1,4 @@
+SET allow_experimental_analyzer = 1;
+
+SELECT count((t, x_0, x_1) -> ((key_2, x_0, x_1) IN (NULL, NULL, '0.3'))) FROM numbers(10); -- { serverError 1 }
+SELECT count((t, x_0, x_1) -> ((key_2, x_0, x_1) IN (NULL, NULL, '0.3'))) OVER (PARTITION BY id) FROM numbers(10); -- { serverError 1 }