Merge remote-tracking branch 'origin/master' into asan-invalid-shared-context-access

2024-09-19 16:20:50 +00:00 · 2024-08-11 18:53:36 +00:00 · 2024-08-11 18:53:36 +00:00 · cd456b149b
commit cd456b149b
parent f2b60c4f74 4de79653ea
32 changed files with 1376 additions and 310 deletions
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -69,8 +69,8 @@ ENV MAX_RUN_TIME=0

 # Unrelated to vars in setup_minio.sh, but should be the same there
 # to have the same binaries for local running scenario
-ARG MINIO_SERVER_VERSION=2022-01-03T18-22-58Z
-ARG MINIO_CLIENT_VERSION=2022-01-05T23-52-51Z
+ARG MINIO_SERVER_VERSION=2024-08-03T04-33-23Z
+ARG MINIO_CLIENT_VERSION=2024-07-31T15-58-33Z
 ARG TARGETARCH

 # Download Minio-related binaries
--- a/docker/test/stateless/setup_minio.sh
+++ b/docker/test/stateless/setup_minio.sh
@ -59,8 +59,8 @@ find_os() {
 download_minio() {
  local os
  local arch
-  local minio_server_version=${MINIO_SERVER_VERSION:-2022-09-07T22-25-02Z}
-  local minio_client_version=${MINIO_CLIENT_VERSION:-2022-08-28T20-08-11Z}
+  local minio_server_version=${MINIO_SERVER_VERSION:-2024-08-03T04-33-23Z}
+  local minio_client_version=${MINIO_CLIENT_VERSION:-2024-07-31T15-58-33Z}

  os=$(find_os)
  arch=$(find_arch)
@ -82,10 +82,10 @@ setup_minio() {
  local test_type=$1
  ./mc alias set clickminio http://localhost:11111 clickhouse clickhouse
  ./mc admin user add clickminio test testtest
-  ./mc admin policy set clickminio readwrite user=test
+  ./mc admin policy attach clickminio readwrite --user=test
  ./mc mb --ignore-existing clickminio/test
  if [ "$test_type" = "stateless" ]; then
-    ./mc policy set public clickminio/test
+    ./mc anonymous set public clickminio/test
  fi
 }

@ -148,4 +148,4 @@ main() {
  setup_aws_credentials
 }

-main "$@"
+main "$@"
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
--- a/src/Backups/WithRetries.cpp
+++ b/src/Backups/WithRetries.cpp
@ -68,13 +68,19 @@ const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const

 WithRetries::FaultyKeeper WithRetries::getFaultyZooKeeper() const
 {
-    /// We need to create new instance of ZooKeeperWithFaultInjection each time a copy a pointer to ZooKeeper client there
+    zkutil::ZooKeeperPtr current_zookeeper;
+    {
+        std::lock_guard lock(zookeeper_mutex);
+        current_zookeeper = zookeeper;
+    }
+
+    /// We need to create new instance of ZooKeeperWithFaultInjection each time and copy a pointer to ZooKeeper client there
    /// The reason is that ZooKeeperWithFaultInjection may reset the underlying pointer and there could be a race condition
    /// when the same object is used from multiple threads.
    auto faulty_zookeeper = ZooKeeperWithFaultInjection::createInstance(
        settings.keeper_fault_injection_probability,
        settings.keeper_fault_injection_seed,
-        zookeeper,
+        current_zookeeper,
        log->name(),
        log);

--- a/src/Common/ShellCommand.cpp
+++ b/src/Common/ShellCommand.cpp
@ -237,7 +237,14 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
        res->write_fds.emplace(fd, fds.fds_rw[1]);
    }

-    LOG_TRACE(getLogger(), "Started shell command '{}' with pid {}", filename, pid);
+    LOG_TRACE(
+        getLogger(),
+        "Started shell command '{}' with pid {} and file descriptors: out {}, err {}",
+        filename,
+        pid,
+        res->out.getFD(),
+        res->err.getFD());
+
    return res;
 }

--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -605,7 +605,7 @@ class IColumn;
    M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
    M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
    M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
-    M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
+    M(Bool, optimize_functions_to_subcolumns, true, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
    M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0)                                                                                                                                           \
    M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0)                                                                                                                                         \
    M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
--- a/src/Core/SettingsChangesHistory.cpp
+++ b/src/Core/SettingsChangesHistory.cpp
@ -84,6 +84,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
            {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
            {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
            {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
+            {"optimize_functions_to_subcolumns", false, true, "Enabled settings by default"},
        }
    },
    {"24.7",
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@ -305,7 +305,8 @@ void S3ObjectStorage::listObjects(const std::string & path, RelativePathsWithMet

    S3::ListObjectsV2Request request;
    request.SetBucket(uri.bucket);
-    request.SetPrefix(path);
+    if (path != "/")
+        request.SetPrefix(path);
    if (max_keys)
        request.SetMaxKeys(static_cast<int>(max_keys));
    else
--- a/src/Processors/Sources/ShellCommandSource.cpp
+++ b/src/Processors/Sources/ShellCommandSource.cpp
@ -8,13 +8,15 @@
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>

-#include <QueryPipeline/Pipe.h>
-#include <Processors/ISimpleTransform.h>
-#include <Processors/Formats/IOutputFormat.h>
-#include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <Interpreters/Context.h>
+#include <Processors/Executors/CompletedPipelineExecutor.h>
+#include <Processors/Formats/IOutputFormat.h>
+#include <Processors/ISimpleTransform.h>
+#include <QueryPipeline/Pipe.h>
+
 #include <boost/circular_buffer.hpp>

+#include <ranges>

 namespace DB
 {
@ -68,11 +70,17 @@ static void makeFdBlocking(int fd)

 static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_milliseconds)
 {
+    auto logger = getLogger("TimeoutReadBufferFromFileDescriptor");
+    auto describe_fd = [](const auto & pollfd) { return fmt::format("(fd={}, flags={})", pollfd.fd, fcntl(pollfd.fd, F_GETFL)); };
+
    int res;

    while (true)
    {
        Stopwatch watch;
+
+        LOG_TEST(logger, "Polling descriptors: {}", fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "));
+
        res = poll(pfds, static_cast<nfds_t>(num), static_cast<int>(timeout_milliseconds));

        if (res < 0)
@ -82,7 +90,10 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond

            const auto elapsed = watch.elapsedMilliseconds();
            if (timeout_milliseconds <= elapsed)
+            {
+                LOG_TEST(logger, "Timeout exceeded: elapsed={}, timeout={}", elapsed, timeout_milliseconds);
                break;
+            }
            timeout_milliseconds -= elapsed;
        }
        else
@ -91,6 +102,12 @@ static int pollWithTimeout(pollfd * pfds, size_t num, size_t timeout_millisecond
        }
    }

+    LOG_TEST(
+        logger,
+        "Poll for descriptors: {} returned {}",
+        fmt::join(std::span(pfds, pfds + num) | std::views::transform(describe_fd), ", "),
+        res);
+
    return res;
 }

@ -200,12 +217,6 @@ public:
        return true;
    }

-    void reset() const
-    {
-        makeFdBlocking(stdout_fd);
-        makeFdBlocking(stderr_fd);
-    }
-
    ~TimeoutReadBufferFromFileDescriptor() override
    {
        tryMakeFdBlocking(stdout_fd);
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -749,8 +749,16 @@ void IMergeTreeDataPart::loadColumnsChecksumsIndexes(bool require_columns_checks
                /// Probably there is something wrong with files of this part.
                /// So it can be helpful to add to the error message some information about those files.
                String files_in_part;
+
                for (auto it = getDataPartStorage().iterate(); it->isValid(); it->next())
-                    files_in_part += fmt::format("{}{} ({} bytes)", (files_in_part.empty() ? "" : ", "), it->name(), getDataPartStorage().getFileSize(it->name()));
+                {
+                    std::string file_info;
+                    if (!getDataPartStorage().isDirectory(it->name()))
+                        file_info = fmt::format(" ({} bytes)", getDataPartStorage().getFileSize(it->name()));
+
+                    files_in_part += fmt::format("{}{}{}", (files_in_part.empty() ? "" : ", "), it->name(), file_info);
+
+                }
                if (!files_in_part.empty())
                    e->addMessage("Part contains files: {}", files_in_part);
                if (isEmpty())
@ -2141,7 +2149,27 @@ void IMergeTreeDataPart::checkConsistencyBase() const
            }
        }

-        checksums.checkSizes(getDataPartStorage());
+        const auto & data_part_storage = getDataPartStorage();
+        for (const auto & [filename, checksum] : checksums.files)
+        {
+            try
+            {
+                checksum.checkSize(data_part_storage, filename);
+            }
+            catch (const Exception & ex)
+            {
+                /// For projection parts check will mark them broken in loadProjections
+                if (!parent_part && filename.ends_with(".proj"))
+                {
+                    std::string projection_name = fs::path(filename).stem();
+                    LOG_INFO(storage.log, "Projection {} doesn't exist on start for part {}, marking it as broken", projection_name, name);
+                    if (hasProjection(projection_name))
+                        markProjectionPartAsBroken(projection_name, ex.message(), ex.code());
+                }
+                else
+                    throw;
+            }
+        }
    }
    else
    {
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -1146,7 +1146,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
    auto metadata_snapshot = getInMemoryMetadataPtr();
    auto virtual_columns_block = getBlockWithVirtualsForFilter(metadata_snapshot, {parts[0]});

-    auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr);
+    auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag.getOutputs().at(0), nullptr, /*allow_partial_result=*/ false);
    if (!filter_dag)
        return {};

@ -6932,7 +6932,8 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
        const auto * predicate = filter_dag->getOutputs().at(0);

        // Generate valid expressions for filtering
-        VirtualColumnUtils::filterBlockWithPredicate(predicate, virtual_columns_block, query_context);
+        VirtualColumnUtils::filterBlockWithPredicate(
+            predicate, virtual_columns_block, query_context, /*allow_filtering_with_partial_predicate =*/true);

        rows = virtual_columns_block.rows();
        part_name_column = virtual_columns_block.getByName("_part").column;
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp
@ -100,12 +100,6 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r
    }
 }

-void MergeTreeDataPartChecksums::checkSizes(const IDataPartStorage & storage) const
-{
-    for (const auto & [name, checksum] : files)
-        checksum.checkSize(storage, name);
-}
-
 UInt64 MergeTreeDataPartChecksums::getTotalSizeOnDisk() const
 {
    UInt64 res = 0;
--- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h
@ -65,9 +65,6 @@ struct MergeTreeDataPartChecksums

    static bool isBadChecksumsErrorCode(int code);

-    /// Checks that the directory contains all the needed files of the correct size. Does not check the checksum.
-    void checkSizes(const IDataPartStorage & storage) const;
-
    /// Returns false if the checksum is too old.
    bool read(ReadBuffer & in);
    /// Assume that header with version (the first line) is read
--- a/src/Storages/VirtualColumnUtils.cpp
+++ b/src/Storages/VirtualColumnUtils.cpp
@ -1,51 +1,46 @@
-#include <algorithm>
+#include <Storages/VirtualColumnUtils.h>
+
 #include <memory>
 #include <stack>
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnSet.h>
+#include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/FilterDescription.h>
 #include <Core/NamesAndTypes.h>
 #include <Core/TypeId.h>
-
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/FunctionsLogical.h>
+#include <Functions/IFunction.h>
+#include <Functions/IFunctionAdaptors.h>
+#include <Functions/indexHint.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/ActionsDAG.h>
+#include <Interpreters/ActionsVisitor.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/TreeRewriter.h>
-#include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/ExpressionActions.h>
+#include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/IdentifierSemantic.h>
+#include <Interpreters/TreeRewriter.h>
 #include <Interpreters/misc.h>
-
-#include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTExpressionList.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSubquery.h>
-
-#include <Columns/ColumnConst.h>
-#include <Columns/ColumnsNumber.h>
-#include <Columns/ColumnsCommon.h>
-#include <Columns/FilterDescription.h>
-
-#include <DataTypes/DataTypesNumber.h>
-#include <DataTypes/DataTypeString.h>
-#include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeDateTime.h>
-
-#include <Processors/QueryPlan/QueryPlan.h>
+#include <Parsers/makeASTForLogicalFunction.h>
+#include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
+#include <Processors/QueryPlan/QueryPlan.h>
 #include <Processors/Sinks/EmptySink.h>
-#include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
-
-#include <Storages/VirtualColumnUtils.h>
-#include <IO/WriteHelpers.h>
 #include <Common/typeid_cast.h>
-#include "Functions/FunctionsLogical.h"
-#include "Functions/IFunction.h"
-#include "Functions/IFunctionAdaptors.h"
-#include "Functions/indexHint.h"
-#include <Parsers/makeASTForLogicalFunction.h>
-#include <Columns/ColumnSet.h>
-#include <Functions/FunctionHelpers.h>
-#include <Interpreters/ActionsVisitor.h>


 namespace DB
@ -281,9 +276,7 @@ bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node)
 }

 static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
-    const ActionsDAG::Node * node,
-    const Block * allowed_inputs,
-    ActionsDAG::Nodes & additional_nodes)
+    const ActionsDAG::Node * node, const Block * allowed_inputs, ActionsDAG::Nodes & additional_nodes, bool allow_partial_result)
 {
    if (node->type == ActionsDAG::ActionType::FUNCTION)
    {
@ -292,8 +285,15 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
            auto & node_copy = additional_nodes.emplace_back(*node);
            node_copy.children.clear();
            for (const auto * child : node->children)
-                if (const auto * child_copy = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes))
+                if (const auto * child_copy
+                    = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result))
                    node_copy.children.push_back(child_copy);
+                /// Expression like (now_allowed AND allowed) is not allowed if allow_partial_result = true. This is important for
+                /// trivial count optimization, otherwise we can get incorrect results. For example, if the query is
+                /// SELECT count() FROM table WHERE _partition_id = '0' AND rowNumberInBlock() = 1, we cannot apply
+                /// trivial count.
+                else if (!allow_partial_result)
+                    return nullptr;

            if (node_copy.children.empty())
                return nullptr;
@ -301,7 +301,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
            if (node_copy.children.size() == 1)
            {
                const ActionsDAG::Node * res = node_copy.children.front();
-                /// Expression like (not_allowed AND 256) can't be resuced to (and(256)) because AND requires
+                /// Expression like (not_allowed AND 256) can't be reduced to (and(256)) because AND requires
                /// at least two arguments; also it can't be reduced to (256) because result type is different.
                if (!res->result_type->equals(*node->result_type))
                {
@ -319,7 +319,7 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
        {
            auto & node_copy = additional_nodes.emplace_back(*node);
            for (auto & child : node_copy.children)
-                if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes); !child)
+                if (child = splitFilterNodeForAllowedInputs(child, allowed_inputs, additional_nodes, allow_partial_result); !child)
                    return nullptr;

            return &node_copy;
@ -333,7 +333,8 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
                    auto index_hint_dag = index_hint->getActions().clone();
                    ActionsDAG::NodeRawConstPtrs atoms;
                    for (const auto & output : index_hint_dag.getOutputs())
-                        if (const auto * child_copy = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes))
+                        if (const auto * child_copy
+                            = splitFilterNodeForAllowedInputs(output, allowed_inputs, additional_nodes, allow_partial_result))
                            atoms.push_back(child_copy);

                    if (!atoms.empty())
@ -367,22 +368,24 @@ static const ActionsDAG::Node * splitFilterNodeForAllowedInputs(
    return node;
 }

-std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs)
+std::optional<ActionsDAG>
+splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result)
 {
    if (!predicate)
        return {};

    ActionsDAG::Nodes additional_nodes;
-    const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes);
+    const auto * res = splitFilterNodeForAllowedInputs(predicate, allowed_inputs, additional_nodes, allow_partial_result);
    if (!res)
        return {};

    return ActionsDAG::cloneSubDAG({res}, true);
 }

-void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context)
+void filterBlockWithPredicate(
+    const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate)
 {
-    auto dag = splitFilterDagForAllowedInputs(predicate, &block);
+    auto dag = splitFilterDagForAllowedInputs(predicate, &block, /*allow_partial_result=*/allow_filtering_with_partial_predicate);
    if (dag)
        filterBlockWithExpression(buildFilterExpression(std::move(*dag), context), block);
 }
--- a/src/Storages/VirtualColumnUtils.h
+++ b/src/Storages/VirtualColumnUtils.h
@ -26,9 +26,13 @@ namespace VirtualColumnUtils
 ///
 /// Otherwise calling filter*() outside applyFilters() will throw "Not-ready Set is passed"
 /// if there are subqueries.
+///
+/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...)))./// Similar to filterBlockWithQuery, but uses ActionsDAG as a predicate.
+/// Basically it is filterBlockWithDAG(splitFilterDagForAllowedInputs).
+/// If allow_filtering_with_partial_predicate is true, then the filtering will be done even if some part of the predicate
+/// cannot be evaluated using the columns from the block.
+void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context, bool allow_filtering_with_partial_predicate = true);

-/// Similar to filterBlockWithExpression(buildFilterExpression(splitFilterDagForAllowedInputs(...))).
-void filterBlockWithPredicate(const ActionsDAG::Node * predicate, Block & block, ContextPtr context);

 /// Just filters block. Block should contain all the required columns.
 ExpressionActionsPtr buildFilterExpression(ActionsDAG dag, ContextPtr context);
@ -41,7 +45,15 @@ void buildSetsForDAG(const ActionsDAG & dag, const ContextPtr & context);
 bool isDeterministicInScopeOfQuery(const ActionsDAG::Node * node);

 /// Extract a part of predicate that can be evaluated using only columns from input_names.
-std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs);
+/// When allow_partial_result is false, then the result will be empty if any part of if cannot be evaluated deterministically
+/// on the given inputs.
+/// allow_partial_result must be false when we are going to use the result to filter parts in
+/// MergeTreeData::totalRowsByPartitionPredicateImp. For example, if the query is
+/// `SELECT count() FROM table  WHERE _partition_id = '0' AND rowNumberInBlock() = 1`
+/// The predicate will be `_partition_id = '0' AND rowNumberInBlock() = 1`, and `rowNumberInBlock()` is
+/// non-deterministic. If we still extract the part `_partition_id = '0'` for filtering parts, then trivial
+/// count optimization will be mistakenly applied to the query.
+std::optional<ActionsDAG> splitFilterDagForAllowedInputs(const ActionsDAG::Node * predicate, const Block * allowed_inputs, bool allow_partial_result = true);

 /// Extract from the input stream a set of `name` column values
 template <typename T>
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@ -985,6 +985,7 @@ def _run_test(job_name: str, run_command: str) -> int:
    else:
        print("Use run command from the workflow")
    env["CHECK_NAME"] = job_name
+    env["MAX_RUN_TIME"] = str(timeout or 0)
    print(f"Going to start run command [{run_command}]")
    stopwatch = Stopwatch()
    job_log = Path(TEMP_PATH) / "job_log.txt"
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@ -114,6 +114,9 @@ def get_run_command(
    if flaky_check:
        envs.append("-e NUM_TRIES=50")
        envs.append("-e MAX_RUN_TIME=2800")
+    else:
+        max_run_time = os.getenv("MAX_RUN_TIME", "0")
+        envs.append(f"-e MAX_RUN_TIME={max_run_time}")

    envs += [f"-e {e}" for e in additional_envs]

--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@ -738,7 +738,7 @@ def create_test_html_report(
    if test_results:
        rows_part = []
        num_fails = 0
-        has_test_time = False
+        has_test_time = any(tr.time is not None for tr in test_results)
        has_log_urls = False

        # Display entires with logs at the top (they correspond to failed tests)
@ -770,12 +770,12 @@ def create_test_html_report(
            row.append(f'<td {fail_id}style="{style}">{test_result.status}</td>')
            colspan += 1

-            row.append("<td>")
-            if test_result.time is not None:
-                has_test_time = True
-                row.append(str(test_result.time))
-            row.append("</td>")
-            colspan += 1
+            if has_test_time:
+                if test_result.time is not None:
+                    row.append(f"<td>{test_result.time}</td>")
+                else:
+                    row.append("<td></td>")
+                colspan += 1

            if test_result.log_urls is not None:
                has_log_urls = True
--- a/tests/integration/test_broken_projections/config.d/dont_start_broken.xml
+++ b/tests/integration/test_broken_projections/config.d/dont_start_broken.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="utf-8"?>
+<clickhouse>
+  <merge_tree>
+    <max_suspicious_broken_parts_bytes>0</max_suspicious_broken_parts_bytes>
+  </merge_tree>
+</clickhouse>
--- a/tests/integration/test_broken_projections/test.py
+++ b/tests/integration/test_broken_projections/test.py
@ -4,6 +4,7 @@ import logging
 import string
 import random
 from helpers.cluster import ClickHouseCluster
+from multiprocessing.dummy import Pool

 cluster = ClickHouseCluster(__file__)

@ -18,6 +19,12 @@ def cluster():
            stay_alive=True,
            with_zookeeper=True,
        )
+        cluster.add_instance(
+            "node_restart",
+            main_configs=["config.d/dont_start_broken.xml"],
+            stay_alive=True,
+            with_zookeeper=True,
+        )

        logging.info("Starting cluster...")
        cluster.start()
@ -632,6 +639,49 @@ def test_broken_on_start(cluster):
    check(node, table_name, 0)


+def test_disappeared_projection_on_start(cluster):
+    node = cluster.instances["node_restart"]
+
+    table_name = "test_disapperead_projection"
+    create_table(node, table_name, 1)
+
+    node.query(f"SYSTEM STOP MERGES {table_name}")
+
+    insert(node, table_name, 0, 5)
+    insert(node, table_name, 5, 5)
+    insert(node, table_name, 10, 5)
+    insert(node, table_name, 15, 5)
+
+    assert ["all_0_0_0", "all_1_1_0", "all_2_2_0", "all_3_3_0"] == get_parts(
+        node, table_name
+    )
+
+    def drop_projection():
+        node.query(
+            f"ALTER TABLE {table_name} DROP PROJECTION proj2",
+            settings={"mutations_sync": "0"},
+        )
+
+    p = Pool(2)
+    p.apply_async(drop_projection)
+
+    for i in range(30):
+        create_query = node.query(f"SHOW CREATE TABLE {table_name}")
+        if "proj2" not in create_query:
+            break
+        time.sleep(0.5)
+
+    assert "proj2" not in create_query
+
+    # Remove 'proj2' for part all_2_2_0
+    break_projection(node, table_name, "proj2", "all_2_2_0", "part")
+
+    node.restart_clickhouse()
+
+    # proj2 is not broken, it doesn't exist, but ok
+    check(node, table_name, 0, expect_broken_part="proj2", do_check_command=0)
+
+
 def test_mutation_with_broken_projection(cluster):
    node = cluster.instances["node"]

--- a/tests/integration/test_drop_is_lock_free/test.py
+++ b/tests/integration/test_drop_is_lock_free/test.py
@ -176,7 +176,7 @@ def test_query_is_permanent(transaction, permanent, exclusive_table):

    select_handler = node.get_query_request(
        f"""
-            SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0;
+            SELECT sleepEachRow(3) FROM {exclusive_table} SETTINGS function_sleep_max_microseconds_per_block = 0, max_threads=1;
        """,
        query_id=query_id,
    )
--- a/tests/performance/optimize_functions_to_subcolumns.xml
+++ b/tests/performance/optimize_functions_to_subcolumns.xml
@ -0,0 +1,26 @@
+<test>
+    <settings>
+        <max_insert_threads>4</max_insert_threads>
+    </settings>
+
+    <create_query>
+        CREATE TABLE t_subcolumns (a Array(UInt64), s Nullable(String), m Map(String, UInt64)) ENGINE = MergeTree ORDER BY tuple()
+    </create_query>
+
+    <fill_query>
+        INSERT INTO t_subcolumns SELECT range(number % 20), toString(number), mapFromArrays(range(number % 20), range(number % 20)) FROM numbers_mt(50000000)
+    </fill_query>
+
+    <fill_query>
+        OPTIMIZE TABLE t_subcolumns FINAL
+    </fill_query>
+
+    <query>SELECT count() FROM t_subcolumns WHERE NOT ignore(length(a))</query>
+    <query>SELECT count() FROM t_subcolumns WHERE notEmpty(a)</query>
+    <query>SELECT count() FROM t_subcolumns WHERE NOT ignore(length(m))</query>
+    <query>SELECT count() FROM t_subcolumns WHERE notEmpty(m)</query>
+    <query>SELECT count() FROM t_subcolumns WHERE isNotNull(s)</query>
+    <query>SELECT count(s) FROM t_subcolumns</query>
+
+    <drop_query>DROP TABLE t_subcolumns</drop_query>
+</test>
--- a/tests/queries/0_stateless/01246_buffer_flush.sh
+++ b/tests/queries/0_stateless/01246_buffer_flush.sh
@ -27,7 +27,7 @@ function wait_until()
 function get_buffer_delay()
 {
    local buffer_insert_id=$1 && shift
-    query "SYSTEM FLUSH LOGS"
+    $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
    query "
        WITH
            (SELECT event_time_microseconds FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND type = 'QueryStart' AND query_id = '$buffer_insert_id') AS begin_,
--- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh
+++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh
@ -1,10 +1,16 @@
 #!/usr/bin/env bash
-# Tags: long
+# Tags: long, no-parallel

 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 # shellcheck source=../shell_config.sh
 . "$CURDIR"/../shell_config.sh

+function query()
+{
+    # NOTE: database_atomic_wait_for_drop_and_detach_synchronously needed only for local env, CI has it ON
+    ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&database_atomic_wait_for_drop_and_detach_synchronously=1" -d "$*"
+}
+
 # NOTE: database = $CLICKHOUSE_DATABASE is unwanted
 verify_sql="SELECT
    (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics)
@ -18,13 +24,13 @@ verify()
 {
    for i in {1..5000}
    do
-        result=$( $CLICKHOUSE_CLIENT --query="$verify_sql" )
+        result=$( query "$verify_sql" )
        [ "$result" = "1" ] && echo "$result" && break
        sleep 0.1

        if [[ $i -eq 5000 ]]
        then
-            $CLICKHOUSE_CLIENT "
+            query "
              SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics;
              SELECT sum(active), sum(NOT active) FROM system.parts;
              SELECT sum(active), sum(NOT active) FROM system.projection_parts;
@ -34,17 +40,17 @@ verify()
    done
 }

-$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE IF EXISTS test_table"
-$CLICKHOUSE_CLIENT --query="CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;"
+query "DROP TABLE IF EXISTS test_table"
+query "CREATE TABLE test_table (data Date) ENGINE = MergeTree PARTITION BY toYear(data) ORDER BY data;"

-$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-01')"
+query "INSERT INTO test_table VALUES ('1992-01-01')"
 verify

-$CLICKHOUSE_CLIENT --query="INSERT INTO test_table VALUES ('1992-01-02')"
+query "INSERT INTO test_table VALUES ('1992-01-02')"
 verify

-$CLICKHOUSE_CLIENT --query="OPTIMIZE TABLE test_table FINAL"
+query "OPTIMIZE TABLE test_table FINAL"
 verify

-$CLICKHOUSE_CLIENT --database_atomic_wait_for_drop_and_detach_synchronously=1 --query="DROP TABLE test_table"
+query "DROP TABLE test_table"
 verify
--- a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference
+++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.reference
@ -0,0 +1,2 @@
+1
+1
--- a/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql
+++ b/tests/queries/0_stateless/03203_count_with_non_deterministic_function.sql
@ -0,0 +1,4 @@
+CREATE TABLE t (p UInt8, x UInt64) Engine = MergeTree PARTITION BY p ORDER BY x;
+INSERT INTO t SELECT 0, number FROM numbers(10) SETTINGS max_block_size = 100;
+SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 0;
+SELECT count() FROM t WHERE p = 0 AND rowNumberInAllBlocks() = 1 SETTINGS allow_experimental_analyzer = 1;
--- a/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference
+++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.reference
@ -0,0 +1,6 @@
+Expression ((Project names + Projection))
+  Aggregating
+    Expression (Before GROUP BY)
+      ReadFromMerge
+        Filter (( + ( + )))
+          ReadFromMergeTree (default.test_03217_merge_replica_1)
--- a/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql
+++ b/tests/queries/0_stateless/03217_filtering_in_storage_merge.sql
@ -0,0 +1,16 @@
+CREATE TABLE test_03217_merge_replica_1(x UInt32)
+    ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r1')
+    ORDER BY x;
+CREATE TABLE test_03217_merge_replica_2(x UInt32)
+    ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_merge_replica', 'r2')
+    ORDER BY x;
+
+
+CREATE TABLE test_03217_all_replicas (x UInt32)
+    ENGINE = Merge(currentDatabase(), 'test_03217_merge_replica_*');
+
+INSERT INTO test_03217_merge_replica_1 SELECT number AS x FROM numbers(10);
+SYSTEM SYNC REPLICA test_03217_merge_replica_2;
+
+-- If the filter on _table is not applied, then the plan will show both replicas
+EXPLAIN SELECT _table, count() FROM test_03217_all_replicas WHERE  _table = 'test_03217_merge_replica_1' AND x >= 0 GROUP BY _table SETTINGS allow_experimental_analyzer=1;
--- a/tests/queries/0_stateless/03217_filtering_in_system_tables.reference
+++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.reference
@ -0,0 +1,6 @@
+information_schema	tables
+both	default	test_03217_system_tables_replica_1	r1
+both	default	test_03217_system_tables_replica_2	r2
+default	test_03217_system_tables_replica_1	r1
+1
+1
--- a/tests/queries/0_stateless/03217_filtering_in_system_tables.sql
+++ b/tests/queries/0_stateless/03217_filtering_in_system_tables.sql
@ -0,0 +1,30 @@
+-- If filtering is not done correctly on databases, then this query report to read 3 rows, which are: `system.tables`, `information_schema.tables` and `INFORMATION_SCHEMA.tables`
+SELECT database, table FROM system.tables WHERE database = 'information_schema' AND table = 'tables';
+
+CREATE TABLE test_03217_system_tables_replica_1(x UInt32)
+    ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r1')
+    ORDER BY x;
+CREATE TABLE test_03217_system_tables_replica_2(x UInt32)
+    ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03217_system_tables_replica', 'r2')
+    ORDER BY x;
+
+-- Make sure we can read both replicas
+-- The replica name might be altered because of `_functional_tests_helper_database_replicated_replace_args_macros`,
+-- thus we need to use `left`
+SELECT 'both', database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase();
+-- If filtering is not done correctly on database-table column, then this query report to read 2 rows, which are the above tables
+SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = 'test_03217_system_tables_replica_1' AND replica_name LIKE 'r1%';
+SYSTEM FLUSH LOGS;
+-- argMax is necessary to make the test repeatable
+
+-- StorageSystemTables
+SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1
+    AND current_database = currentDatabase()
+    AND query LIKE '%SELECT database, table FROM system.tables WHERE database = \'information_schema\' AND table = \'tables\';'
+    AND type = 'QueryFinish';
+
+-- StorageSystemReplicas
+SELECT argMax(read_rows, event_time_microseconds) FROM system.query_log WHERE 1
+    AND current_database = currentDatabase()
+    AND query LIKE '%SELECT database, table, left(replica_name, 2) FROM system.replicas WHERE database = currentDatabase() AND table = \'test_03217_system_tables_replica_1\' AND replica_name LIKE \'r1\%\';'
+    AND type = 'QueryFinish';
--- a/tests/queries/0_stateless/03218_materialize_msan.reference
+++ b/tests/queries/0_stateless/03218_materialize_msan.reference
@ -0,0 +1 @@
+[(NULL,'11\01111111\011111','1111')]		-2147483648	\N
--- a/tests/queries/0_stateless/03218_materialize_msan.sql
+++ b/tests/queries/0_stateless/03218_materialize_msan.sql
@ -0,0 +1,23 @@
+SET enable_analyzer = 1;
+
+SELECT
+    materialize([(NULL, '11\01111111\011111', '1111')]) AS t,
+    (t[1048576]).2,
+    materialize(-2147483648),
+    (t[-2147483648]).1
+GROUP BY
+    materialize([(NULL, '1')]),
+    '',
+    (materialize((t[1023]).2), (materialize(''), (t[2147483647]).1, materialize(9223372036854775807)), (materialize(''), materialize(NULL, 2147483647, t[65535], 256)), materialize(NULL))
+; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH}
+
+SELECT
+    materialize([(NULL, '11\01111111\011111', '1111')]) AS t,
+    (t[1048576]).2,
+    materialize(-2147483648),
+    (t[-2147483648]).1
+GROUP BY
+    materialize([(NULL, '1')]),
+    '',
+    (materialize((t[1023]).2), (materialize(''), (t[2147483647]).1, materialize(9223372036854775807)), (materialize(''), materialize(NULL), materialize(2147483647), materialize(t[65535]), materialize(256)), materialize(NULL))
+;
				`@ -0,0 +1 @@`
				`[(NULL,'11\01111111\011111','1111')] -2147483648 \N`