Merge branch 'ClickHouse:master' into DictShortCircuit

2024-12-14 18:32:29 +00:00 · 2023-12-21 14:30:28 -05:00 · 2023-12-21 14:30:28 -05:00 · a3a080f916
commit a3a080f916
parent 24e9ba681e dc82aaef06
58 changed files with 483 additions and 410 deletions
--- a/.gitmessage
+++ b/.gitmessage
@ -0,0 +1,10 @@
+
+
+## To avoid merge commit in CI run (add a leading space to apply):
+#no-merge-commit
+
+## Running specified job (add a leading space to apply):
+#job_<JOB NAME>
+#job_stateless_tests_release
+#job_package_debug
+#job_integration_tests_asan
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.11.2.11"
+ARG VERSION="23.11.3.23"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""

--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.11.2.11"
+ARG VERSION="23.11.3.23"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""

--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -30,7 +30,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.11.2.11"
+ARG VERSION="23.11.3.23"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
--- a/docs/changelogs/v23.11.3.23-stable.md
+++ b/docs/changelogs/v23.11.3.23-stable.md
@ -0,0 +1,26 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.11.3.23-stable (a14ab450b0e) FIXME as compared to v23.11.2.11-stable (6e5411358c8)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix invalid memory access in BLAKE3 (Rust) [#57876](https://github.com/ClickHouse/ClickHouse/pull/57876) ([Raúl Marín](https://github.com/Algunenano)).
+* Normalize function names in CREATE INDEX [#57906](https://github.com/ClickHouse/ClickHouse/pull/57906) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix handling of unavailable replicas before first request happened [#57933](https://github.com/ClickHouse/ClickHouse/pull/57933) ([Nikita Taranov](https://github.com/nickitat)).
+* Revert "Fix bug window functions: revert [#39631](https://github.com/ClickHouse/ClickHouse/issues/39631)" [#58031](https://github.com/ClickHouse/ClickHouse/pull/58031) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+
+#### NO CL CATEGORY
+
+* Backported in [#57918](https://github.com/ClickHouse/ClickHouse/issues/57918):. [#57909](https://github.com/ClickHouse/ClickHouse/pull/57909) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Remove heavy rust stable toolchain [#57905](https://github.com/ClickHouse/ClickHouse/pull/57905) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Fix docker image for integration tests (fixes CI) [#57952](https://github.com/ClickHouse/ClickHouse/pull/57952) ([Azat Khuzhin](https://github.com/azat)).
+* Always use `pread` for reading cache segments [#57970](https://github.com/ClickHouse/ClickHouse/pull/57970) ([Nikita Taranov](https://github.com/nickitat)).
+
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@ -157,11 +157,16 @@ BackupImpl::~BackupImpl()
 void BackupImpl::open()
 {
    std::lock_guard lock{mutex};
-    LOG_INFO(log, "{} backup: {}", ((open_mode == OpenMode::WRITE) ? "Writing" : "Reading"), backup_name_for_logging);
-    ProfileEvents::increment((open_mode == OpenMode::WRITE) ? ProfileEvents::BackupsOpenedForWrite : ProfileEvents::BackupsOpenedForRead);

-    if (open_mode == OpenMode::WRITE)
+    if (open_mode == OpenMode::READ)
    {
+        ProfileEvents::increment(ProfileEvents::BackupsOpenedForRead);
+        LOG_INFO(log, "Reading backup: {}", backup_name_for_logging);
+    }
+    else
+    {
+        ProfileEvents::increment(ProfileEvents::BackupsOpenedForWrite);
+        LOG_INFO(log, "Writing backup: {}", backup_name_for_logging);
        timestamp = std::time(nullptr);
        if (!uuid)
            uuid = UUIDHelpers::generateV4();
--- a/src/Backups/BackupInfo.cpp
+++ b/src/Backups/BackupInfo.cpp
@ -78,13 +78,16 @@ BackupInfo BackupInfo::fromAST(const IAST & ast)
            }
        }

-        res.args.reserve(list->children.size() - index);
-        for (; index < list->children.size(); ++index)
+        size_t args_size = list->children.size();
+        res.args.reserve(args_size - index);
+        for (; index < args_size; ++index)
        {
            const auto & elem = list->children[index];
            const auto * lit = elem->as<const ASTLiteral>();
            if (!lit)
+            {
                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected literal, got {}", serializeAST(*elem));
+            }
            res.args.push_back(lit->value);
        }
    }
--- a/src/Backups/RestorerFromBackup.cpp
+++ b/src/Backups/RestorerFromBackup.cpp
@ -43,14 +43,6 @@ namespace Stage = BackupCoordinationStage;

 namespace
 {
-    /// Uppercases the first character of a passed string.
-    String toUpperFirst(const String & str)
-    {
-        String res = str;
-        res[0] = std::toupper(res[0]);
-        return res;
-    }
-
    /// Outputs "table <name>" or "temporary table <name>"
    String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper)
    {
@ -145,7 +137,7 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode)

 void RestorerFromBackup::setStage(const String & new_stage, const String & message)
 {
-    LOG_TRACE(log, fmt::runtime(toUpperFirst(new_stage)));
+    LOG_TRACE(log, "Setting stage: {}", new_stage);
    current_stage = new_stage;

    if (restore_coordination)
--- a/src/Common/FieldVisitorToString.cpp
+++ b/src/Common/FieldVisitorToString.cpp
@ -18,16 +18,37 @@ template <typename T>
 static inline String formatQuoted(T x)
 {
    WriteBufferFromOwnString wb;
-    writeQuoted(x, wb);
-    return wb.str();
-}

-template <typename T>
-static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf)
-{
-    writeChar('\'', buf);
-    writeText(x.getValue(), x.getScale(), buf, {});
-    writeChar('\'', buf);
+    if constexpr (is_decimal_field<T>)
+    {
+        writeChar('\'', wb);
+        writeText(x.getValue(), x.getScale(), wb, {});
+        writeChar('\'', wb);
+    }
+    else if constexpr (is_big_int_v<T>)
+    {
+        writeChar('\'', wb);
+        writeText(x, wb);
+        writeChar('\'', wb);
+    }
+    else
+    {
+        /// While `writeQuoted` sounds like it will always write the value in quotes,
+        /// in fact it means: write according to the rules of the quoted format, like VALUES,
+        /// where strings, dates, date-times, UUID are in quotes, and numbers are not.
+
+        /// That's why we take extra care to put Decimal and big integers inside quotes
+        /// when formatting literals in SQL language,
+        /// because it is different from the quoted formats like VALUES.
+
+        /// In fact, there are no Decimal and big integer literals in SQL,
+        /// but they can appear if we format the query from a modified AST.
+
+        /// We can fix this idiosyncrasy later.
+
+        writeQuoted(x, wb);
+    }
+    return wb.str();
 }

 /** In contrast to writeFloatText (and writeQuoted),
--- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.cpp
@ -290,6 +290,11 @@ bool ZooKeeperWithFaultInjection::exists(const std::string & path, Coordination:
    return executeWithFaultSync(__func__, path, [&]() { return keeper->exists(path, stat, watch); });
 }

+bool ZooKeeperWithFaultInjection::anyExists(const std::vector<std::string> & paths)
+{
+    return executeWithFaultSync(__func__, !paths.empty() ? paths.front() : "", [&]() { return keeper->anyExists(paths); });
+}
+
 zkutil::ZooKeeper::MultiExistsResponse ZooKeeperWithFaultInjection::exists(const std::vector<std::string> & paths)
 {
    return executeWithFaultSync(__func__, !paths.empty() ? paths.front() : "", [&]() { return keeper->exists(paths); });
--- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
+++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h
@ -59,6 +59,7 @@ private:
 class ZooKeeperWithFaultInjection
 {
    zkutil::ZooKeeper::Ptr keeper;
+
    std::unique_ptr<RandomFaultInjection> fault_policy;
    std::string name;
    Poco::Logger * logger = nullptr;
@ -203,6 +204,8 @@ public:

    zkutil::ZooKeeper::MultiExistsResponse exists(const std::vector<std::string> & paths);

+    bool anyExists(const std::vector<std::string> & paths);
+
    std::string create(const std::string & path, const std::string & data, int32_t mode);

    Coordination::Error tryCreate(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@ -859,6 +859,10 @@ nuraft::cb_func::ReturnCode KeeperServer::callbackFunc(nuraft::cb_func::Type typ
            initial_batch_committed = true;
            return nuraft::cb_func::ReturnCode::Ok;
        }
+        case nuraft::cb_func::PreAppendLogLeader:
+        {
+            return nuraft::cb_func::ReturnCode::ReturnNull;
+        }
        case nuraft::cb_func::PreAppendLogFollower:
        {
            const auto & entry = *static_cast<LogEntryPtr *>(param->ctx);
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@ -373,7 +373,7 @@ void DiskLocal::removeDirectory(const String & path)
 {
    auto fs_path = fs::path(disk_path) / path;
    if (0 != rmdir(fs_path.c_str()))
-        ErrnoException::throwFromPath(ErrorCodes::CANNOT_RMDIR, fs_path, "Cannot rmdir {}", fs_path);
+        ErrnoException::throwFromPath(ErrorCodes::CANNOT_RMDIR, fs_path, "Cannot remove directory {}", fs_path);
 }

 void DiskLocal::removeRecursive(const String & path)
--- a/src/Functions/FunctionsStringHashFixedString.cpp
+++ b/src/Functions/FunctionsStringHashFixedString.cpp
@ -172,7 +172,7 @@ struct SHA512Impl256
        /// SSL library that we use, for S390X architecture only OpenSSL is supported. But the SHA512-256, SHA512_256_Init,
        /// SHA512_256_Update, SHA512_256_Final methods to calculate hash (similar to the other SHA functions) aren't available
        /// in the current version of OpenSSL that we use which necessitates the use of the EVP interface.
-        auto md_ctx = EVP_MD_CTX_create();
+        auto * md_ctx = EVP_MD_CTX_create();
        EVP_DigestInit_ex(md_ctx, EVP_sha512_256(), nullptr /*engine*/);
        EVP_DigestUpdate(md_ctx, begin, size);
        EVP_DigestFinal_ex(md_ctx, out_char_data, nullptr /*size*/);
--- a/src/Functions/if.cpp
+++ b/src/Functions/if.cpp
@ -23,7 +23,7 @@
 #include <Functions/FunctionIfBase.h>
 #include <Interpreters/castColumn.h>
 #include <Functions/FunctionFactory.h>
-
+#include <type_traits>

 namespace DB
 {
@ -42,7 +42,8 @@ using namespace GatherUtils;
 /** Selection function by condition: if(cond, then, else).
  * cond - UInt8
  * then, else - numeric types for which there is a general type, or dates, datetimes, or strings, or arrays of these types.
-  */
+  * For better performance, try to use branch free code for numeric types(i.e. cond ? a : b --> !!cond * a + !cond * b), except floating point types because of Inf or NaN.
+*/

 template <typename ArrayCond, typename ArrayA, typename ArrayB, typename ArrayResult, typename ResultType>
 inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const ArrayB & b, ArrayResult & res)
@ -55,24 +56,48 @@ inline void fillVectorVector(const ArrayCond & cond, const ArrayA & a, const Arr
    {
        size_t a_index = 0, b_index = 0;
        for (size_t i = 0; i < size; ++i)
-            res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[b_index++]);
+        {
+            if constexpr (std::is_integral_v<ResultType>)
+            {
+                res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
+                a_index += !!cond[i];
+                b_index += !cond[i];
+            }
+            else
+                res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[b_index++]);
+        }
    }
    else if (a_is_short)
    {
        size_t a_index = 0;
        for (size_t i = 0; i < size; ++i)
-            res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[i]);
+            if constexpr (std::is_integral_v<ResultType>)
+            {
+                res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b[i]);
+                a_index += !!cond[i];
+            }
+            else
+                res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b[i]);
    }
    else if (b_is_short)
    {
        size_t b_index = 0;
        for (size_t i = 0; i < size; ++i)
-            res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[b_index++]);
+            if constexpr (std::is_integral_v<ResultType>)
+            {
+                res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
+                b_index += !cond[i];
+            }
+            else
+                res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[b_index++]);
    }
    else
    {
        for (size_t i = 0; i < size; ++i)
-            res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[i]);
+            if constexpr (std::is_integral_v<ResultType>)
+                res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b[i]);
+            else
+                res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[i]);
    }
 }

@ -85,12 +110,21 @@ inline void fillVectorConstant(const ArrayCond & cond, const ArrayA & a, B b, Ar
    {
        size_t a_index = 0;
        for (size_t i = 0; i < size; ++i)
-            res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b);
+            if constexpr (std::is_integral_v<ResultType>)
+            {
+                res[i] = !!cond[i] * static_cast<ResultType>(a[a_index]) + (!cond[i]) * static_cast<ResultType>(b);
+                a_index += !!cond[i];
+            }
+            else
+                res[i] = cond[i] ? static_cast<ResultType>(a[a_index++]) : static_cast<ResultType>(b);
    }
    else
    {
        for (size_t i = 0; i < size; ++i)
-            res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b);
+            if constexpr (std::is_integral_v<ResultType>)
+                res[i] = !!cond[i] * static_cast<ResultType>(a[i]) + (!cond[i]) * static_cast<ResultType>(b);
+            else
+                res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b);
    }
 }

@ -103,12 +137,21 @@ inline void fillConstantVector(const ArrayCond & cond, A a, const ArrayB & b, Ar
    {
        size_t b_index = 0;
        for (size_t i = 0; i < size; ++i)
-            res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[b_index++]);
+            if constexpr (std::is_integral_v<ResultType>)
+            {
+                res[i] = !!cond[i] * static_cast<ResultType>(a) + (!cond[i]) * static_cast<ResultType>(b[b_index]);
+                b_index += !cond[i];
+            }
+            else
+                res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[b_index++]);
    }
    else
    {
        for (size_t i = 0; i < size; ++i)
-            res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[i]);
+            if constexpr (std::is_integral_v<ResultType>)
+                res[i] = !!cond[i] * static_cast<ResultType>(a) + (!cond[i]) * static_cast<ResultType>(b[i]);
+            else
+                res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[i]);
    }
 }

--- a/src/Parsers/ParserOptimizeQuery.cpp
+++ b/src/Parsers/ParserOptimizeQuery.cpp
@ -28,6 +28,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
    ParserKeyword s_partition("PARTITION");
    ParserKeyword s_final("FINAL");
    ParserKeyword s_deduplicate("DEDUPLICATE");
+    ParserKeyword s_cleanup("CLEANUP");
    ParserKeyword s_by("BY");
    ParserToken s_dot(TokenType::Dot);
    ParserIdentifier name_p(true);
@ -76,6 +77,9 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
            return false;
    }

+    /// Obsolete feature, ignored for backward compatibility.
+    s_cleanup.ignore(pos, expected);
+
    auto query = std::make_shared<ASTOptimizeQuery>();
    node = query;

--- a/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/DWARFBlockInputFormat.cpp
@ -740,7 +740,7 @@ void DWARFBlockInputFormat::parseFilenameTable(UnitState & unit, uint64_t offset
    auto error = prologue.parse(*debug_line_extractor, &offset, /*RecoverableErrorHandler*/ [&](auto e)
        {
            if (++seen_debug_line_warnings < 10)
-                LOG_INFO(&Poco::Logger::get("DWARF"), "{}", llvm::toString(std::move(e)));
+                LOG_INFO(&Poco::Logger::get("DWARF"), "Parsing error: {}", llvm::toString(std::move(e)));
        }, *dwarf_context, unit.dwarf_unit);

    if (error)
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@ -729,8 +729,8 @@ void HTTPHandler::processQuery(
    /// to some other value.
    const auto & settings = context->getSettingsRef();

-    /// Only readonly queries are allowed for HTTP GET requests.
-    if (request.getMethod() == HTTPServerRequest::HTTP_GET)
+    /// Anything else beside HTTP POST should be readonly queries.
+    if (request.getMethod() != HTTPServerRequest::HTTP_POST)
    {
        if (settings.readonly == 0)
            context->setSetting("readonly", 2);
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -593,23 +593,6 @@ UInt64 IMergeTreeDataPart::getMarksCount() const
    return index_granularity.getMarksCount();
 }

-UInt64 IMergeTreeDataPart::getExistingBytesOnDisk() const
-{
-    if (!supportLightweightDeleteMutate() || !hasLightweightDelete() || !rows_count
-        || !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge)
-        return bytes_on_disk;
-
-    /// Uninitialized existing_rows_count
-    /// (if existing_rows_count equals rows_count, it means that previously we failed to read existing_rows_count)
-    if (existing_rows_count > rows_count)
-        readExistingRowsCount();
-
-    if (existing_rows_count < rows_count)
-        return bytes_on_disk * existing_rows_count / rows_count;
-    else /// Load failed
-        return bytes_on_disk;
-}
-
 size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const
 {
    auto checksum = checksums.files.find(file_name);
@ -1304,85 +1287,6 @@ void IMergeTreeDataPart::loadRowsCount()
    }
 }

-void IMergeTreeDataPart::readExistingRowsCount() const
-{
-    if (!supportLightweightDeleteMutate() || !hasLightweightDelete() || !storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge
-        || existing_rows_count < rows_count || !getMarksCount())
-        return;
-
-    std::lock_guard lock(existing_rows_count_mutex);
-
-    /// Already read by another thread
-    if (existing_rows_count < rows_count)
-        return;
-
-    NamesAndTypesList cols;
-    cols.push_back(LightweightDeleteDescription::FILTER_COLUMN);
-
-    StorageMetadataPtr metadata_ptr = storage.getInMemoryMetadataPtr();
-    StorageSnapshotPtr storage_snapshot_ptr = std::make_shared<StorageSnapshot>(storage, metadata_ptr);
-
-    MergeTreeReaderPtr reader = getReader(
-        cols,
-        storage_snapshot_ptr,
-        MarkRanges{MarkRange(0, getMarksCount())},
-        nullptr,
-        storage.getContext()->getMarkCache().get(),
-        std::make_shared<AlterConversions>(),
-        MergeTreeReaderSettings{},
-        ValueSizeMap{},
-        ReadBufferFromFileBase::ProfileCallback{});
-
-    if (!reader)
-    {
-        LOG_WARNING(storage.log, "Create reader failed while reading existing rows count");
-        existing_rows_count = rows_count;
-        return;
-    }
-
-    size_t current_mark = 0;
-    const size_t total_mark = getMarksCount();
-
-    bool continue_reading = false;
-    size_t current_row = 0;
-    size_t existing_count = 0;
-
-    while (current_row < rows_count)
-    {
-        size_t rows_to_read = index_granularity.getMarkRows(current_mark);
-        continue_reading = (current_mark != 0);
-
-        Columns result;
-        result.resize(1);
-
-        size_t rows_read = reader->readRows(current_mark, total_mark, continue_reading, rows_to_read, result);
-        if (!rows_read)
-        {
-            LOG_WARNING(storage.log, "Part {} has lightweight delete, but _row_exists column not found", name);
-            existing_rows_count = rows_count;
-            return;
-        }
-
-        current_row += rows_read;
-        current_mark += (rows_to_read == rows_read);
-
-        const ColumnUInt8 * row_exists_col = typeid_cast<const ColumnUInt8 *>(result[0].get());
-        if (!row_exists_col)
-        {
-            LOG_WARNING(storage.log, "Part {} _row_exists column type is not UInt8", name);
-            existing_rows_count = rows_count;
-            return;
-        }
-
-        for (UInt8 row_exists : row_exists_col->getData())
-            if (row_exists)
-                existing_count++;
-    }
-
-    existing_rows_count = existing_count;
-    LOG_DEBUG(storage.log, "Part {} existing_rows_count = {}", name, existing_rows_count);
-}
-
 void IMergeTreeDataPart::appendFilesOfRowsCount(Strings & files)
 {
    files.push_back("count.txt");
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@ -229,13 +229,6 @@ public:

    size_t rows_count = 0;

-    /// Existing rows count (excluding lightweight deleted rows)
-    /// UINT64_MAX -> uninitialized
-    /// 0 -> all rows were deleted
-    /// if reading failed, it will be set to rows_count
-    mutable size_t existing_rows_count = UINT64_MAX;
-    mutable std::mutex existing_rows_count_mutex;
-
    time_t modification_time = 0;
    /// When the part is removed from the working set. Changes once.
    mutable std::atomic<time_t> remove_time { std::numeric_limits<time_t>::max() };
@ -381,10 +374,6 @@ public:
    void setBytesOnDisk(UInt64 bytes_on_disk_) { bytes_on_disk = bytes_on_disk_; }
    void setBytesUncompressedOnDisk(UInt64 bytes_uncompressed_on_disk_) { bytes_uncompressed_on_disk = bytes_uncompressed_on_disk_; }

-    /// Returns estimated size of existing rows if setting exclude_deleted_rows_for_part_size_in_merge is true
-    /// Otherwise returns bytes_on_disk
-    UInt64 getExistingBytesOnDisk() const;
-
    size_t getFileSizeOrZero(const String & file_name) const;
    auto getFilesChecksums() const { return checksums.files; }

@ -511,9 +500,6 @@ public:
    /// True if here is lightweight deleted mask file in part.
    bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); }

-    /// Read existing rows count from _row_exists column
-    void readExistingRowsCount() const;
-
    void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings);

    /// Checks the consistency of this data part.
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@ -160,7 +160,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
    }

    /// Start to make the main work
-    size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts, true);
+    size_t estimated_space_for_merge = MergeTreeDataMergerMutator::estimateNeededDiskSpace(parts);

    /// Can throw an exception while reserving space.
    IMergeTreeDataPart::TTLInfos ttl_infos;
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@ -405,7 +405,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo
        }

        IMergeSelector::Part part_info;
-        part_info.size = part->getExistingBytesOnDisk();
+        part_info.size = part->getBytesOnDisk();
        part_info.age = res.current_time - part->modification_time;
        part_info.level = part->info.level;
        part_info.data = &part;
@ -611,7 +611,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti
            return SelectPartsDecision::CANNOT_SELECT;
        }

-        sum_bytes += (*it)->getExistingBytesOnDisk();
+        sum_bytes += (*it)->getBytesOnDisk();

        prev_it = it;
        ++it;
@ -791,7 +791,7 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart
 }


-size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge)
+size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts)
 {
    size_t res = 0;
    time_t current_time = std::time(nullptr);
@ -802,10 +802,7 @@ size_t MergeTreeDataMergerMutator::estimateNeededDiskSpace(const MergeTreeData::
        if (part_max_ttl && part_max_ttl <= current_time)
            continue;

-        if (is_merge && part->storage.getSettings()->exclude_deleted_rows_for_part_size_in_merge)
-            res += part->getExistingBytesOnDisk();
-        else
-            res += part->getBytesOnDisk();
+        res += part->getBytesOnDisk();
    }

    return static_cast<size_t>(res * DISK_USAGE_COEFFICIENT_TO_RESERVE);
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@ -192,7 +192,7 @@ public:


    /// The approximate amount of disk space needed for merge or mutation. With a surplus.
-    static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts, const bool & is_merge);
+    static size_t estimateNeededDiskSpace(const MergeTreeData::DataPartsVector & source_parts);

 private:
    /** Select all parts belonging to the same partition.
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@ -78,7 +78,6 @@ struct Settings;
    M(UInt64, number_of_mutations_to_throw, 1000, "If table has at least that many unfinished mutations, throw 'Too many mutations' exception. Disabled if set to 0", 0) \
    M(UInt64, min_delay_to_mutate_ms, 10, "Min delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
    M(UInt64, max_delay_to_mutate_ms, 1000, "Max delay of mutating MergeTree table in milliseconds, if there are a lot of unfinished mutations", 0) \
-    M(Bool, exclude_deleted_rows_for_part_size_in_merge, false, "Use an estimated source part size (excluding lightweight deleted rows) when selecting parts to merge", 0) \
    \
    /** Inserts settings. */ \
    M(UInt64, parts_to_delay_insert, 1000, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \
--- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp
@ -49,7 +49,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare()
    }

    /// TODO - some better heuristic?
-    size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part}, false);
+    size_t estimated_space_for_result = MergeTreeDataMergerMutator::estimateNeededDiskSpace({source_part});

    if (entry.create_time + storage_settings_ptr->prefer_fetch_merged_part_time_threshold.totalSeconds() <= time(nullptr)
        && estimated_space_for_result >= storage_settings_ptr->prefer_fetch_merged_part_size_threshold)
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@ -269,6 +269,12 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in, MergeTreeDataFor

                    deduplicate_by_columns = std::move(new_deduplicate_by_columns);
                }
+                else if (checkString("cleanup: ", in))
+                {
+                    /// Obsolete option, does nothing.
+                    bool cleanup = false;
+                    in >> cleanup;
+                }
                else
                    trailing_newline_found = true;
            }
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@ -1349,7 +1349,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
                if (auto part_in_memory = asInMemoryPart(part))
                    sum_parts_size_in_bytes += part_in_memory->block.bytes();
                else
-                    sum_parts_size_in_bytes += part->getExistingBytesOnDisk();
+                    sum_parts_size_in_bytes += part->getBytesOnDisk();
            }
        }

--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -1085,7 +1085,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge(
    if (isTTLMergeType(future_part->merge_type))
        getContext()->getMergeList().bookMergeWithTTL();

-    merging_tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts, true), *this, metadata_snapshot, false);
+    merging_tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace(future_part->parts), *this, metadata_snapshot, false);
    return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(merging_tagger), std::make_shared<MutationCommands>());
 }

@ -1301,7 +1301,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate(
            future_part->name = part->getNewName(new_part_info);
            future_part->part_format = part->getFormat();

-            tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}, false), *this, metadata_snapshot, true);
+            tagger = std::make_unique<CurrentlyMergingPartsTagger>(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true);
            return std::make_shared<MergeMutateSelectedEntry>(future_part, std::move(tagger), commands, txn);
        }
    }
--- a/tests/ci/build_report_check.py
+++ b/tests/ci/build_report_check.py
@ -78,7 +78,7 @@ def main():
    pr_info = PRInfo()
    commit = get_commit(gh, pr_info.sha)

-    atexit.register(update_mergeable_check, gh, pr_info, build_check_name)
+    atexit.register(update_mergeable_check, commit, pr_info, build_check_name)

    rerun_helper = RerunHelper(commit, build_check_name)
    if rerun_helper.is_already_finished_by_status():
--- a/tests/ci/ci.py
+++ b/tests/ci/ci.py
@ -135,7 +135,7 @@ def parse_args(parser: argparse.ArgumentParser) -> argparse.Namespace:
        "--skip-jobs",
        action="store_true",
        default=False,
-        help="skip fetching data about job runs, used in --configure action (for debugging)",
+        help="skip fetching data about job runs, used in --configure action (for debugging and nigthly ci)",
    )
    parser.add_argument(
        "--rebuild-all-docker",
@ -279,11 +279,11 @@ def _configure_docker_jobs(
    images_info = docker_images_helper.get_images_info()

    # a. check missing images
-    print("Start checking missing images in dockerhub")
-    # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
-    #         find if it's possible to use the setting of /etc/docker/daemon.json
-    docker_images_helper.docker_login()
    if not rebuild_all_dockers:
+        # FIXME: we need login as docker manifest inspect goes directly to one of the *.docker.com hosts instead of "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"]
+        #         find if it's possible to use the setting of /etc/docker/daemon.json
+        docker_images_helper.docker_login()
+        print("Start checking missing images in dockerhub")
        missing_multi_dict = check_missing_images_on_dockerhub(imagename_digest_dict)
        missing_multi = list(missing_multi_dict)
        missing_amd64 = []
@ -305,6 +305,15 @@ def _configure_docker_jobs(
                    "aarch64",
                )
            )
+        # FIXME: temporary hack, remove after transition to docker digest as tag
+        else:
+            if missing_multi:
+                print(
+                    f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag"
+                )
+                for image in missing_multi:
+                    imagename_digest_dict[image] = "latest"
+        print("...checking missing images in dockerhub - done")
    else:
        # add all images to missing
        missing_multi = list(imagename_digest_dict)
@ -315,16 +324,7 @@ def _configure_docker_jobs(
            for name in imagename_digest_dict
            if not images_info[name]["only_amd64"]
        ]
-    # FIXME: temporary hack, remove after transition to docker digest as tag
-    if docker_digest_or_latest:
-        if missing_multi:
-            print(
-                f"WARNING: Missing images {list(missing_multi)} - fallback to latest tag"
-            )
-            for image in missing_multi:
-                imagename_digest_dict[image] = "latest"

-    print("...checking missing images in dockerhub - done")
    return {
        "images": imagename_digest_dict,
        "missing_aarch64": missing_aarch64,
@ -377,6 +377,7 @@ def _configure_jobs(
                for batch in range(num_batches):  # type: ignore
                    batches_to_do.append(batch)
        elif job_config.run_always:
+            # always add to todo
            batches_to_do.append(batch)
        else:
            # this job controlled by digest, add to todo if it's not successfully done before
@ -396,6 +397,21 @@ def _configure_jobs(
        else:
            jobs_to_skip += (job,)

+    if pr_labels:
+        jobs_requested_by_label = []  # type: List[str]
+        ci_controlling_labels = []  # type: List[str]
+        for label in pr_labels:
+            label_config = CI_CONFIG.get_label_config(label)
+            if label_config:
+                jobs_requested_by_label += label_config.run_jobs
+                ci_controlling_labels += [label]
+        if ci_controlling_labels:
+            print(f"NOTE: CI controlling labels are set: [{ci_controlling_labels}]")
+            print(
+                f"    :   following jobs will be executed: [{jobs_requested_by_label}]"
+            )
+            jobs_to_do = jobs_requested_by_label
+
    if commit_tokens:
        requested_jobs = [
            token[len("#job_") :]
@ -415,7 +431,7 @@ def _configure_jobs(
                    if parent in jobs_to_do and parent not in jobs_to_do_requested:
                        jobs_to_do_requested.append(parent)
            print(
-                f"NOTE: Only specific job(s) were requested: [{jobs_to_do_requested}]"
+                f"NOTE: Only specific job(s) were requested by commit message tokens: [{jobs_to_do_requested}]"
            )
            jobs_to_do = jobs_to_do_requested

@ -548,14 +564,14 @@ def main() -> int:

    if args.configure:
        GR = GitRunner()
-        pr_info = PRInfo(need_changed_files=True)
+        pr_info = PRInfo()

        docker_data = {}
        git_ref = GR.run(f"{GIT_PREFIX} rev-parse HEAD")

        # if '#no-merge-commit' is set in commit message - set git ref to PR branch head to avoid merge-commit
        tokens = []
-        if pr_info.number != 0:
+        if pr_info.number != 0 and not args.skip_jobs:
            message = GR.run(f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1")
            tokens = _fetch_commit_tokens(message)
            print(f"Found commit message tokens: [{tokens}]")
@ -607,8 +623,10 @@ def main() -> int:
        result["jobs_data"] = jobs_data
        result["docker_data"] = docker_data
        if pr_info.number != 0 and not args.docker_digest_or_latest:
+            # FIXME: it runs style check before docker build if possible (style-check images is not changed)
+            #    find a way to do style check always before docker build and others
            _check_and_update_for_early_style_check(result)
-        if pr_info.number != 0 and pr_info.has_changes_in_documentation_only():
+        if pr_info.has_changes_in_documentation_only():
            _update_config_for_docs_only(result)

    elif args.update_gh_statuses:
@ -689,7 +707,8 @@ def main() -> int:
    elif args.mark_success:
        assert indata, "Run config must be provided via --infile"
        job = args.job_name
-        num_batches = CI_CONFIG.get_job_config(job).num_batches
+        job_config = CI_CONFIG.get_job_config(job)
+        num_batches = job_config.num_batches
        assert (
            num_batches <= 1 or 0 <= args.batch < num_batches
        ), f"--batch must be provided and in range [0, {num_batches}) for {job}"
@ -706,7 +725,7 @@ def main() -> int:
            if not CommitStatusData.is_present():
                # apparently exit after rerun-helper check
                # do nothing, exit without failure
-                print("ERROR: no status file for job [{job}]")
+                print(f"ERROR: no status file for job [{job}]")
                job_status = CommitStatusData(
                    status="dummy failure",
                    description="dummy status",
@ -717,7 +736,9 @@ def main() -> int:
                job_status = CommitStatusData.load_status()

        # Storing job data (report_url) to restore OK GH status on job results reuse
-        if job_status.is_ok():
+        if job_config.run_always:
+            print(f"Job [{job}] runs always in CI - do not mark as done")
+        elif job_status.is_ok():
            success_flag_name = get_file_flag_name(
                job, indata["jobs_data"]["digests"][job], args.batch, num_batches
            )
--- a/tests/ci/ci_config.py
+++ b/tests/ci/ci_config.py
@ -1,5 +1,6 @@
 #!/usr/bin/env python3

+from enum import Enum
 import logging

 from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
@ -8,6 +9,10 @@ from pathlib import Path
 from typing import Callable, Dict, Iterable, List, Literal, Optional, Union


+class Labels(Enum):
+    DO_NOT_TEST_LABEL = "do not test"
+
+
@dataclass
 class DigestConfig:
    # all files, dirs to include into digest, glob supported
@ -22,6 +27,15 @@ class DigestConfig:
    git_submodules: bool = False


+@dataclass
+class LabelConfig:
+    """
+    class to configure different CI scenarious per GH label
+    """
+
+    run_jobs: Iterable[str] = frozenset()
+
+
@dataclass
 class JobConfig:
    """
@ -95,7 +109,7 @@ class TestConfig:
 BuildConfigs = Dict[str, BuildConfig]
 BuildsReportConfig = Dict[str, BuildReportConfig]
 TestConfigs = Dict[str, TestConfig]
-
+LabelConfigs = Dict[str, LabelConfig]

 # common digests configs
 compatibility_check_digest = DigestConfig(
@ -268,6 +282,13 @@ class CiConfig:
    builds_report_config: BuildsReportConfig
    test_configs: TestConfigs
    other_jobs_configs: TestConfigs
+    label_configs: LabelConfigs
+
+    def get_label_config(self, label_name: str) -> Optional[LabelConfig]:
+        for label, config in self.label_configs.items():
+            if label_name == label:
+                return config
+        return None

    def get_job_config(self, check_name: str) -> JobConfig:
        res = None
@ -417,6 +438,9 @@ class CiConfig:


 CI_CONFIG = CiConfig(
+    label_configs={
+        Labels.DO_NOT_TEST_LABEL.value: LabelConfig(run_jobs=["Style check"]),
+    },
    build_config={
        "package_release": BuildConfig(
            name="package_release",
@ -847,6 +871,7 @@ CI_CONFIG.validate()

 # checks required by Mergeable Check
 REQUIRED_CHECKS = [
+    "PR Check",
    "ClickHouse build check",
    "ClickHouse special build check",
    "Docs Check",
--- a/tests/ci/clickbench.py
+++ b/tests/ci/clickbench.py
@ -133,7 +133,7 @@ def main():
    pr_info = PRInfo()

    commit = get_commit(gh, pr_info.sha)
-    atexit.register(update_mergeable_check, gh, pr_info, check_name)
+    atexit.register(update_mergeable_check, commit, pr_info, check_name)

    rerun_helper = RerunHelper(commit, check_name)
    if rerun_helper.is_already_finished_by_status():
--- a/tests/ci/commit_status_helper.py
+++ b/tests/ci/commit_status_helper.py
@ -15,11 +15,10 @@ from github.CommitStatus import CommitStatus
 from github.GithubException import GithubException
 from github.GithubObject import NotSet
 from github.IssueComment import IssueComment
-from github.PullRequest import PullRequest
 from github.Repository import Repository

 from ci_config import CI_CONFIG, REQUIRED_CHECKS, CHECK_DESCRIPTIONS, CheckDescription
-from env_helper import GITHUB_REPOSITORY, GITHUB_RUN_URL, TEMP_PATH
+from env_helper import GITHUB_JOB_URL, GITHUB_REPOSITORY, TEMP_PATH
 from pr_info import PRInfo, SKIP_MERGEABLE_CHECK_LABEL
 from report import (
    ERROR,
@ -437,11 +436,11 @@ def set_mergeable_check(
        context=MERGEABLE_NAME,
        description=description,
        state=state,
-        target_url=GITHUB_RUN_URL,
+        target_url=GITHUB_JOB_URL(),
    )


-def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None:
+def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> None:
    not_run = (
        pr_info.labels.intersection({SKIP_MERGEABLE_CHECK_LABEL, "release"})
        or check_name not in REQUIRED_CHECKS
@ -454,7 +453,6 @@ def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None

    logging.info("Update Mergeable Check by %s", check_name)

-    commit = get_commit(gh, pr_info.sha)
    statuses = get_commit_filtered_statuses(commit)

    required_checks = [
@ -475,14 +473,17 @@ def update_mergeable_check(gh: Github, pr_info: PRInfo, check_name: str) -> None
        else:
            fail.append(status.context)

+    state: StatusType = SUCCESS
+
+    if success:
+        description = ", ".join(success)
+    else:
+        description = "awaiting job statuses"
+
    if fail:
        description = "failed: " + ", ".join(fail)
-        description = format_description(description)
-        if mergeable_status is None or mergeable_status.description != description:
-            set_mergeable_check(commit, description, FAILURE)
-        return
-
-    description = ", ".join(success)
+        state = FAILURE
    description = format_description(description)
+
    if mergeable_status is None or mergeable_status.description != description:
-        set_mergeable_check(commit, description)
+        set_mergeable_check(commit, description, state)
--- a/tests/ci/docs_check.py
+++ b/tests/ci/docs_check.py
@ -67,7 +67,7 @@ def main():
    if rerun_helper.is_already_finished_by_status():
        logging.info("Check is already finished according to github status, exiting")
        sys.exit(0)
-    atexit.register(update_mergeable_check, gh, pr_info, NAME)
+    atexit.register(update_mergeable_check, commit, pr_info, NAME)

    if not pr_info.has_changes_in_documentation() and not args.force:
        logging.info("No changes in documentation")
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@ -124,7 +124,7 @@ def main():
    gh = Github(get_best_robot_token(), per_page=100)
    commit = get_commit(gh, pr_info.sha)

-    atexit.register(update_mergeable_check, gh, pr_info, NAME)
+    atexit.register(update_mergeable_check, commit, pr_info, NAME)

    rerun_helper = RerunHelper(commit, NAME)
    if rerun_helper.is_already_finished_by_status():
--- a/tests/ci/finish_check.py
+++ b/tests/ci/finish_check.py
@ -18,9 +18,9 @@ def main():

    pr_info = PRInfo(need_orgs=True)
    gh = Github(get_best_robot_token(), per_page=100)
-    # Update the Mergeable Check at the final step
-    update_mergeable_check(gh, pr_info, CI_STATUS_NAME)
    commit = get_commit(gh, pr_info.sha)
+    # Update the Mergeable Check at the final step
+    update_mergeable_check(commit, pr_info, CI_STATUS_NAME)

    statuses = [
        status
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@ -254,7 +254,7 @@ def main():
    )

    commit = get_commit(gh, pr_info.sha)
-    atexit.register(update_mergeable_check, gh, pr_info, check_name)
+    atexit.register(update_mergeable_check, commit, pr_info, check_name)

    if validate_bugfix_check and "pr-bugfix" not in pr_info.labels:
        if args.post_commit_status == "file":
--- a/tests/ci/install_check.py
+++ b/tests/ci/install_check.py
@ -279,7 +279,7 @@ def main():
    if CI:
        gh = Github(get_best_robot_token(), per_page=100)
        commit = get_commit(gh, pr_info.sha)
-        atexit.register(update_mergeable_check, gh, pr_info, args.check_name)
+        atexit.register(update_mergeable_check, commit, pr_info, args.check_name)

        rerun_helper = RerunHelper(commit, args.check_name)
        if rerun_helper.is_already_finished_by_status():
--- a/tests/ci/libfuzzer_test_check.py
+++ b/tests/ci/libfuzzer_test_check.py
@ -118,7 +118,7 @@ def main():
    gh = Github(get_best_robot_token(), per_page=100)
    pr_info = PRInfo()
    commit = get_commit(gh, pr_info.sha)
-    atexit.register(update_mergeable_check, gh, pr_info, check_name)
+    atexit.register(update_mergeable_check, commit, pr_info, check_name)

    temp_path.mkdir(parents=True, exist_ok=True)

--- a/tests/ci/pr_info.py
+++ b/tests/ci/pr_info.py
@ -2,7 +2,7 @@
 import json
 import logging
 import os
-from typing import Dict, List, Set, Union, Literal
+from typing import Dict, List, Set, Union

 from unidiff import PatchSet  # type: ignore

@ -93,6 +93,7 @@ class PRInfo:
                github_event = PRInfo.default_event.copy()
        self.event = github_event
        self.changed_files = set()  # type: Set[str]
+        self.changed_files_requested = False
        self.body = ""
        self.diff_urls = []  # type: List[str]
        # release_pr and merged_pr are used for docker images additional cache
@ -285,6 +286,7 @@ class PRInfo:
            response.raise_for_status()
            diff_object = PatchSet(response.text)
            self.changed_files.update({f.path for f in diff_object})
+        self.changed_files_requested = True
        print(f"Fetched info about {len(self.changed_files)} changed files")

    def get_dict(self):
@ -297,9 +299,10 @@ class PRInfo:
        }

    def has_changes_in_documentation(self) -> bool:
-        # If the list wasn't built yet the best we can do is to
-        # assume that there were changes.
-        if self.changed_files is None or not self.changed_files:
+        if not self.changed_files_requested:
+            self.fetch_changed_files()
+
+        if not self.changed_files:
            return True

        for f in self.changed_files:
@ -316,7 +319,11 @@ class PRInfo:
        checks if changes are docs related without other changes
        FIXME: avoid hardcoding filenames here
        """
+        if not self.changed_files_requested:
+            self.fetch_changed_files()
+
        if not self.changed_files:
+            # if no changes at all return False
            return False

        for f in self.changed_files:
@ -332,7 +339,10 @@ class PRInfo:
        return True

    def has_changes_in_submodules(self):
-        if self.changed_files is None or not self.changed_files:
+        if not self.changed_files_requested:
+            self.fetch_changed_files()
+
+        if not self.changed_files:
            return True

        for f in self.changed_files:
@ -340,75 +350,6 @@ class PRInfo:
                return True
        return False

-    def can_skip_builds_and_use_version_from_master(self):
-        if FORCE_TESTS_LABEL in self.labels:
-            return False
-
-        if self.changed_files is None or not self.changed_files:
-            return False
-
-        return not any(
-            f.startswith("programs")
-            or f.startswith("src")
-            or f.startswith("base")
-            or f.startswith("cmake")
-            or f.startswith("rust")
-            or f == "CMakeLists.txt"
-            or f == "tests/ci/build_check.py"
-            for f in self.changed_files
-        )
-
-    def can_skip_integration_tests(self, versions: List[str]) -> bool:
-        if FORCE_TESTS_LABEL in self.labels:
-            return False
-
-        # If docker image(s) relevant to integration tests are updated
-        if any(self.sha in version for version in versions):
-            return False
-
-        if self.changed_files is None or not self.changed_files:
-            return False
-
-        if not self.can_skip_builds_and_use_version_from_master():
-            return False
-
-        # Integration tests can be skipped if integration tests are not changed
-        return not any(
-            f.startswith("tests/integration/")
-            or f == "tests/ci/integration_test_check.py"
-            for f in self.changed_files
-        )
-
-    def can_skip_functional_tests(
-        self, version: str, test_type: Literal["stateless", "stateful"]
-    ) -> bool:
-        if FORCE_TESTS_LABEL in self.labels:
-            return False
-
-        # If docker image(s) relevant to functional tests are updated
-        if self.sha in version:
-            return False
-
-        if self.changed_files is None or not self.changed_files:
-            return False
-
-        if not self.can_skip_builds_and_use_version_from_master():
-            return False
-
-        # Functional tests can be skipped if queries tests are not changed
-        if test_type == "stateless":
-            return not any(
-                f.startswith("tests/queries/0_stateless")
-                or f == "tests/ci/functional_test_check.py"
-                for f in self.changed_files
-            )
-        else:  # stateful
-            return not any(
-                f.startswith("tests/queries/1_stateful")
-                or f == "tests/ci/functional_test_check.py"
-                for f in self.changed_files
-            )
-

 class FakePRInfo:
    def __init__(self):
--- a/tests/ci/run_check.py
+++ b/tests/ci/run_check.py
@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+import atexit
 import sys
 import logging
 from typing import Tuple
@ -13,9 +14,8 @@ from commit_status_helper import (
    post_commit_status,
    post_labels,
    remove_labels,
-    set_mergeable_check,
+    update_mergeable_check,
 )
-from docs_check import NAME as DOCS_NAME
 from env_helper import GITHUB_REPOSITORY, GITHUB_SERVER_URL
 from get_robot_token import get_best_robot_token
 from pr_info import FORCE_TESTS_LABEL, PRInfo
@ -24,6 +24,7 @@ from lambda_shared_package.lambda_shared.pr import (
    TRUSTED_CONTRIBUTORS,
    check_pr_description,
 )
+from report import FAILURE

 TRUSTED_ORG_IDS = {
    54801242,  # clickhouse
@ -31,9 +32,9 @@ TRUSTED_ORG_IDS = {

 OK_SKIP_LABELS = {"release", "pr-backport", "pr-cherrypick"}
 CAN_BE_TESTED_LABEL = "can be tested"
-DO_NOT_TEST_LABEL = "do not test"
 FEATURE_LABEL = "pr-feature"
 SUBMODULE_CHANGED_LABEL = "submodule changed"
+PR_CHECK = "PR Check"


 def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):
@ -58,24 +59,16 @@ def pr_is_by_trusted_user(pr_user_login, pr_user_orgs):

 # Returns whether we should look into individual checks for this PR. If not, it
 # can be skipped entirely.
-# Returns can_run, description, labels_state
-def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
+# Returns can_run, description
+def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str]:
    # Consider the labels and whether the user is trusted.
    print("Got labels", pr_info.labels)
    if FORCE_TESTS_LABEL in pr_info.labels:
        print(f"Label '{FORCE_TESTS_LABEL}' set, forcing remaining checks")
-        return True, f"Labeled '{FORCE_TESTS_LABEL}'", "pending"
-
-    if DO_NOT_TEST_LABEL in pr_info.labels:
-        print(f"Label '{DO_NOT_TEST_LABEL}' set, skipping remaining checks")
-        return False, f"Labeled '{DO_NOT_TEST_LABEL}'", "success"
+        return True, f"Labeled '{FORCE_TESTS_LABEL}'"

    if OK_SKIP_LABELS.intersection(pr_info.labels):
-        return (
-            True,
-            "Don't try new checks for release/backports/cherry-picks",
-            "success",
-        )
+        return True, "Don't try new checks for release/backports/cherry-picks"

    if CAN_BE_TESTED_LABEL not in pr_info.labels and not pr_is_by_trusted_user(
        pr_info.user_login, pr_info.user_orgs
@ -83,9 +76,9 @@ def should_run_ci_for_pr(pr_info: PRInfo) -> Tuple[bool, str, str]:
        print(
            f"PRs by untrusted users need the '{CAN_BE_TESTED_LABEL}' label - please contact a member of the core team"
        )
-        return False, "Needs 'can be tested' label", "failure"
+        return False, "Needs 'can be tested' label"

-    return True, "No special conditions apply", "pending"
+    return True, "No special conditions apply"


 def main():
@ -98,7 +91,7 @@ def main():
        print("::notice ::Cannot run, no PR exists for the commit")
        sys.exit(1)

-    can_run, description, labels_state = should_run_ci_for_pr(pr_info)
+    can_run, description = should_run_ci_for_pr(pr_info)
    if can_run and OK_SKIP_LABELS.intersection(pr_info.labels):
        print("::notice :: Early finish the check, running in a special PR")
        sys.exit(0)
@ -106,6 +99,7 @@ def main():
    description = format_description(description)
    gh = Github(get_best_robot_token(), per_page=100)
    commit = get_commit(gh, pr_info.sha)
+    atexit.register(update_mergeable_check, commit, pr_info, PR_CHECK)

    description_error, category = check_pr_description(pr_info.body, GITHUB_REPOSITORY)
    pr_labels_to_add = []
@ -136,22 +130,6 @@ def main():
    if pr_labels_to_remove:
        remove_labels(gh, pr_info, pr_labels_to_remove)

-    # FIXME: it should rather be in finish check. no reason to stop ci run.
-    if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation():
-        print(
-            f"The '{FEATURE_LABEL}' in the labels, "
-            "but there's no changed documentation"
-        )
-        post_commit_status(  # do not pass pr_info here intentionally
-            commit,
-            "failure",
-            "",
-            f"expect adding docs for {FEATURE_LABEL}",
-            DOCS_NAME,
-            pr_info,
-        )
-        sys.exit(0)
-
    if description_error:
        print(
            "::error ::Cannot run, PR description does not match the template: "
@ -171,34 +149,40 @@ def main():
            "failure",
            url,
            format_description(description_error),
-            CI_STATUS_NAME,
+            PR_CHECK,
            pr_info,
        )
        sys.exit(1)

-    set_mergeable_check(commit, "skipped")
-    ci_report_url = create_ci_report(pr_info, [])
+    if FEATURE_LABEL in pr_info.labels and not pr_info.has_changes_in_documentation():
+        print(
+            f"The '{FEATURE_LABEL}' in the labels, "
+            "but there's no changed documentation"
+        )
+        post_commit_status(
+            commit,
+            FAILURE,
+            "",
+            f"expect adding docs for {FEATURE_LABEL}",
+            PR_CHECK,
+            pr_info,
+        )
+        # allow the workflow to continue
+
    if not can_run:
        print("::notice ::Cannot run")
-        post_commit_status(
-            commit,
-            labels_state,
-            ci_report_url,
-            description,
-            CI_STATUS_NAME,
-            pr_info,
-        )
        sys.exit(1)
-    else:
-        print("::notice ::Can run")
-        post_commit_status(
-            commit,
-            "pending",
-            ci_report_url,
-            description,
-            CI_STATUS_NAME,
-            pr_info,
-        )
+
+    ci_report_url = create_ci_report(pr_info, [])
+    print("::notice ::Can run")
+    post_commit_status(
+        commit,
+        "pending",
+        ci_report_url,
+        description,
+        CI_STATUS_NAME,
+        pr_info,
+    )


 if __name__ == "__main__":
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@ -145,7 +145,7 @@ def main():
    gh = GitHub(get_best_robot_token(), create_cache_dir=False)
    commit = get_commit(gh, pr_info.sha)

-    atexit.register(update_mergeable_check, gh, pr_info, NAME)
+    atexit.register(update_mergeable_check, commit, pr_info, NAME)

    rerun_helper = RerunHelper(commit, NAME)
    if rerun_helper.is_already_finished_by_status():
--- a/tests/ci/unit_tests_check.py
+++ b/tests/ci/unit_tests_check.py
@ -187,7 +187,7 @@ def main():
    gh = Github(get_best_robot_token(), per_page=100)
    commit = get_commit(gh, pr_info.sha)

-    atexit.register(update_mergeable_check, gh, pr_info, check_name)
+    atexit.register(update_mergeable_check, commit, pr_info, check_name)

    rerun_helper = RerunHelper(commit, check_name)
    if rerun_helper.is_already_finished_by_status():
--- a/tests/performance/if.xml
+++ b/tests/performance/if.xml
@ -0,0 +1,12 @@
+<test>
+
+<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() > 42949673, zero + 1, zero + 2)) ]]></query>
+<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 3865470566, zero + 1, zero + 2)) ]]></query>
+<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 2147483647, zero + 1, zero + 2)) ]]></query>
+<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, zero + 1, zero + 2)) ]]></query>
+
+<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, zero + 1, 2)) ]]></query>
+<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, 1, zero + 2)) ]]></query>
+<query><![CDATA[ SELECT count() FROM zeros(1000000000) WHERE NOT ignore(if(rand32() < 42949673, 1, 2)) ]]></query>
+
+</test>
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference
@ -1,15 +1,15 @@
-runtime messages	0.001
-runtime exceptions	0.05
-unknown runtime exceptions	0.01
-messages shorter than 10	1
-messages shorter than 16	3
-exceptions shorter than 30	3	[]
-noisy messages	0.3
-noisy Trace messages	0.16
-noisy Debug messages	0.09
-noisy Info messages	0.05
-noisy Warning messages	0.01
-noisy Error messages	0.02
+runtime messages	0.001	[]
+runtime exceptions	0.05	[]
+unknown runtime exceptions	0.01	[]
+messages shorter than 10	1	[]
+messages shorter than 16	1	[]
+exceptions shorter than 30	1	[]
+noisy messages	0.3	
+noisy Trace messages	0.16	
+noisy Debug messages	0.09	
+noisy Info messages	0.05	
+noisy Warning messages	0.01	
+noisy Error messages	0.03	
 no Fatal messages	0
 number of too noisy messages	3
 number of noisy messages	10
--- a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
+++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql
@ -9,17 +9,61 @@ create view logs as select * from system.text_log where now() - toIntervalMinute

 -- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation.
 -- 0.001 threshold should be always enough, the value was about 0.00025
-select 'runtime messages', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.001) from logs
-    where message not like '% Received from %clickhouse-staging.com:9440%' and source_file not like '%/AWSLogger.cpp%';
+WITH 0.001 AS threshold
+SELECT
+    'runtime messages',
+    greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold),
+    v <= threshold ? [] :
+        (SELECT groupArray((message, c)) FROM (
+            SELECT message, count() as c FROM logs
+            WHERE
+                length(message_format_string) = 0
+              AND message not like '% Received from %clickhouse-staging.com:9440%'
+              AND source_file not like '%/AWSLogger.cpp%'
+            GROUP BY message ORDER BY c LIMIT 10
+        ))
+FROM logs
+WHERE
+    message NOT LIKE '% Received from %clickhouse-staging.com:9440%'
+  AND source_file not like '%/AWSLogger.cpp%';

 -- Check the same for exceptions. The value was 0.03
-select 'runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.05) from logs
-    where (message like '%DB::Exception%' or message like '%Coordination::Exception%')
-    and message not like '% Received from %clickhouse-staging.com:9440%';
+WITH 0.05 AS threshold
+SELECT
+    'runtime exceptions',
+    greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold),
+    v <= threshold ? [] :
+        (SELECT groupArray((message, c)) FROM (
+            SELECT message, count() as c FROM logs
+            WHERE
+                length(message_format_string) = 0
+              AND (message like '%DB::Exception%' or message like '%Coordination::Exception%')
+              AND message not like '% Received from %clickhouse-staging.com:9440%'
+            GROUP BY message ORDER BY c LIMIT 10
+        ))
+FROM logs
+WHERE
+    message NOT LIKE '% Received from %clickhouse-staging.com:9440%'
+  AND (message like '%DB::Exception%' or message like '%Coordination::Exception%');
+
+WITH 0.01 AS threshold
+SELECT
+    'unknown runtime exceptions',
+    greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0) as v, threshold),
+    v <= threshold ? [] :
+        (SELECT groupArray((message, c)) FROM (
+            SELECT message, count() as c FROM logs
+            WHERE
+                length(message_format_string) = 0
+              AND (message like '%DB::Exception%' or message like '%Coordination::Exception%')
+              AND message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%'
+            GROUP BY message ORDER BY c LIMIT 10
+        ))
+FROM logs
+WHERE
+  (message like '%DB::Exception%' or message like '%Coordination::Exception%')
+  AND message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%';

-select 'unknown runtime exceptions', greatest(coalesce(sum(length(message_format_string) = 0) / countOrNull(), 0), 0.01) from logs where
-    (message like '%DB::Exception%' or message like '%Coordination::Exception%')
-    and message not like '% Received from %' and message not like '%(SYNTAX_ERROR)%';

 -- FIXME some of the following messages are not informative and it has to be fixed
 create temporary table known_short_messages (s String) as select * from (select
@ -51,15 +95,20 @@ create temporary table known_short_messages (s String) as select * from (select
 ] as arr) array join arr;

 -- Check that we don't have too many short meaningless message patterns.
+WITH 1 AS max_messages
 select 'messages shorter than 10',
-    greatest(uniqExact(message_format_string), 1)
+    (uniqExact(message_format_string) as c) <= max_messages,
+    c <= max_messages ? [] : groupUniqArray(message_format_string)
    from logs
    where length(message_format_string) < 10 and message_format_string not in known_short_messages;

 -- Same as above. Feel free to update the threshold or remove this query if really necessary
+WITH 3 AS max_messages
 select 'messages shorter than 16',
-    greatest(uniqExact(message_format_string), 3)
-    from logs where length(message_format_string) < 16 and message_format_string not in known_short_messages;
+    (uniqExact(message_format_string) as c) <= max_messages,
+    c <= max_messages ? [] : groupUniqArray(message_format_string)
+    from logs
+    where length(message_format_string) < 16 and message_format_string not in known_short_messages;

 -- Unlike above, here we look at length of the formatted message, not format string. Most short format strings are fine because they end up decorated with context from outer or inner exceptions, e.g.:
 -- "Expected end of line" -> "Code: 117. DB::Exception: Expected end of line: (in file/uri /var/lib/clickhouse/user_files/data_02118): (at row 1)"
@ -68,40 +117,53 @@ select 'messages shorter than 16',
 -- This table currently doesn't have enough information to do this reliably, so we just regex search for " (ERROR_NAME_IN_CAPS)" and hope that's good enough.
 -- For the "Code: 123. DB::Exception: " part, we just subtract 26 instead of searching for it. Because sometimes it's not at the start, e.g.:
 -- "Unexpected error, will try to restart main thread: Code: 341. DB::Exception: Unexpected error: Code: 57. DB::Exception:[...]"
+WITH 3 AS max_messages
 select 'exceptions shorter than 30',
-    greatest(uniqExact(message_format_string), 3) AS c,
-    c = 3 ? [] : groupUniqArray(message_format_string)
+    (uniqExact(message_format_string) as c) <= max_messages,
+    c <= max_messages ? [] : groupUniqArray(message_format_string)
    from logs
    where message ilike '%DB::Exception%' and if(length(extract(message, '(.*)\\([A-Z0-9_]+\\)')) as pref > 0, pref, length(message)) < 30 + 26 and message_format_string not in known_short_messages;

-
 -- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold
-select 'noisy messages',
-    greatest((select count() from logs group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.30);
+WITH 0.30 as threshold
+select
+    'noisy messages',
+    greatest(coalesce(((select message_format_string, count() from logs group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
+    r <= threshold ? '' : top_message.1;

 -- Same as above, but excluding Test level (actually finds top 1 Trace message)
-with ('Access granted: {}{}', '{} -> {}') as frequent_in_tests
-select 'noisy Trace messages',
-    greatest((select count() from logs where level!='Test' and message_format_string not in frequent_in_tests
-        group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.16);
+with 0.16 as threshold
+select
+    'noisy Trace messages',
+    greatest(coalesce(((select message_format_string, count() from logs where level = 'Trace' and message_format_string not in ('Access granted: {}{}', '{} -> {}')
+                        group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
+    r <= threshold ? '' : top_message.1;

 -- Same as above for Debug
+WITH 0.09 as threshold
 select 'noisy Debug messages',
-    greatest((select count() from logs where level <= 'Debug' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.09);
+       greatest(coalesce(((select message_format_string, count() from logs where level = 'Debug' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
+       r <= threshold ? '' : top_message.1;

 -- Same as above for Info
+WITH 0.05 as threshold
 select 'noisy Info messages',
-    greatest((select count() from logs where level <= 'Information' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.05);
+       greatest(coalesce(((select message_format_string, count() from logs where level = 'Information' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
+       r <= threshold ? '' : top_message.1;

 -- Same as above for Warning
-with ('Not enabled four letter command {}') as frequent_in_tests
-select 'noisy Warning messages',
-    greatest(coalesce((select count() from logs where level = 'Warning' and message_format_string not in frequent_in_tests
-    group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.01);
+with 0.01 as threshold
+select
+    'noisy Warning messages',
+    greatest(coalesce(((select message_format_string, count() from logs where level = 'Warning' and message_format_string not in ('Not enabled four letter command {}')
+                       group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
+    r <= threshold ? '' : top_message.1;

 -- Same as above for Error
+WITH 0.03 as threshold
 select 'noisy Error messages',
-    greatest(coalesce((select count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1), 0) / (select count() from logs), 0.02);
+    greatest(coalesce(((select message_format_string, count() from logs where level = 'Error' group by message_format_string order by count() desc limit 1) as top_message).2, 0) / (select count() from logs), threshold) as r,
+    r <= threshold ? '' : top_message.1;

 select 'no Fatal messages', count() from logs where level = 'Fatal';

--- a/tests/queries/0_stateless/02567_and_consistency.sql
+++ b/tests/queries/0_stateless/02567_and_consistency.sql
@ -5,6 +5,7 @@ FROM
 )
 GROUP BY number
 HAVING 1 AND sin(sum(number))
+ORDER BY ALL
 SETTINGS enable_optimize_predicate_expression = 0;

 SELECT '=====';
@ -16,6 +17,7 @@ FROM
 )
 GROUP BY number
 HAVING 1 AND sin(1)
+ORDER BY ALL
 SETTINGS enable_optimize_predicate_expression = 0;

 SELECT '=====';
@ -27,6 +29,7 @@ FROM
 )
 GROUP BY number
 HAVING x AND sin(sum(number))
+ORDER BY ALL
 SETTINGS enable_optimize_predicate_expression = 1;

 SELECT '=====';
@ -38,6 +41,7 @@ FROM
 )
 GROUP BY number
 HAVING 1 AND sin(sum(number))
+ORDER BY ALL
 SETTINGS enable_optimize_predicate_expression = 0;

 SELECT '=====';
@ -57,6 +61,7 @@ FROM
 )
 GROUP BY number
 HAVING 1 AND sin(sum(number))
+ORDER BY ALL
 SETTINGS enable_optimize_predicate_expression = 1;

 select '#45440';
@ -72,12 +77,16 @@ SELECT
    NOT h,
    h IS NULL
 FROM t2 AS left
-GROUP BY g;
-select '=';
+GROUP BY g
+ORDER BY g DESC;
+
+SELECT '=';
+
 SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null
                  FROM t2 AS left
                  GROUP BY g HAVING h ORDER BY g DESC SETTINGS enable_optimize_predicate_expression = 0;
-select '=';
+SELECT  '=';
+
 SELECT MAX(left.c0), min2(left.c0, -(-left.c0) * (radians(left.c0) - radians(left.c0))) as g, (((-1925024212 IS NOT NULL) IS NOT NULL) != radians(tan(1216286224))) AND cos(lcm(MAX(left.c0), -1966575216) OR (MAX(left.c0) * 1180517420)) as h, not h, h is null
                  FROM t2 AS left
                  GROUP BY g HAVING h ORDER BY g DESC SETTINGS enable_optimize_predicate_expression = 1;
--- a/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh
+++ b/tests/queries/0_stateless/02901_parallel_replicas_rollup.sh
@ -29,7 +29,7 @@ $CLICKHOUSE_CLIENT \
  --query_id "${query_id}" \
  --max_parallel_replicas 3 \
  --prefer_localhost_replica 1 \
-  --cluster_for_parallel_replicas "parallel_replicas" \
+  --cluster_for_parallel_replicas "test_cluster_one_shard_three_replicas_localhost" \
  --allow_experimental_parallel_reading_from_replicas 1 \
  --parallel_replicas_for_non_replicated_merge_tree 1 \
  --parallel_replicas_min_number_of_rows_per_replica 0 \
@ -62,7 +62,7 @@ $CLICKHOUSE_CLIENT \
  --query_id "${query_id}" \
  --max_parallel_replicas 3 \
  --prefer_localhost_replica 1 \
-  --cluster_for_parallel_replicas "parallel_replicas" \
+  --cluster_for_parallel_replicas "test_cluster_one_shard_three_replicas_localhost" \
  --allow_experimental_parallel_reading_from_replicas 1 \
  --parallel_replicas_for_non_replicated_merge_tree 1 \
  --parallel_replicas_min_number_of_rows_per_replica 0 \
--- a/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql
+++ b/tests/queries/0_stateless/02922_analyzer_aggregate_nothing_type.sql
@ -11,7 +11,7 @@ SET
    allow_experimental_parallel_reading_from_replicas=1,
    max_parallel_replicas=2,
    use_hedged_requests=0,
-    cluster_for_parallel_replicas='parallel_replicas',
+    cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost',
    parallel_replicas_for_non_replicated_merge_tree=1
 ;

--- a/tests/queries/0_stateless/02942_consider_lwd_when_merge.reference
+++ b/tests/queries/0_stateless/02942_consider_lwd_when_merge.reference
@ -1,3 +0,0 @@
-2
-2
-1
--- a/tests/queries/0_stateless/02942_consider_lwd_when_merge.sql
+++ b/tests/queries/0_stateless/02942_consider_lwd_when_merge.sql
@ -1,23 +0,0 @@
-DROP TABLE IF EXISTS lwd_merge;
-
-CREATE TABLE lwd_merge (id UInt64 CODEC(NONE))
-    ENGINE = MergeTree ORDER BY id
-SETTINGS max_bytes_to_merge_at_max_space_in_pool = 80000, exclude_deleted_rows_for_part_size_in_merge = 0;
-
-INSERT INTO lwd_merge SELECT number FROM numbers(10000);
-INSERT INTO lwd_merge SELECT number FROM numbers(10000, 10000);
-
-OPTIMIZE TABLE lwd_merge;
-SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
-
-DELETE FROM lwd_merge WHERE id % 10 > 0;
-
-OPTIMIZE TABLE lwd_merge;
-SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
-
-ALTER TABLE lwd_merge MODIFY SETTING exclude_deleted_rows_for_part_size_in_merge = 1;
-
-OPTIMIZE TABLE lwd_merge;
-SELECT count() FROM system.parts WHERE database = currentDatabase() AND table = 'lwd_merge' AND active = 1;
-
-DROP TABLE IF EXISTS lwd_merge;
--- a/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.reference
+++ b/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.reference
@ -0,0 +1,2 @@
+Cannot execute query in readonly mode
+Internal Server Error
--- a/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.sh
+++ b/tests/queries/0_stateless/02947_non_post_request_should_be_readonly.sh
@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+# Tags: no-parallel
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+# This should fail
+${CLICKHOUSE_CURL} -X GET -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}&query=CREATE+DATABASE+non_post_request_test" | grep -o "Cannot execute query in readonly mode"
+
+# This should fail
+${CLICKHOUSE_CURL} --head -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}&query=CREATE+DATABASE+non_post_request_test" | grep -o "Internal Server Error"
+
+# This should pass - but will throw error "non_post_request_test already exists" if the database was created by any of the above requests.
+${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}" -d 'CREATE DATABASE non_post_request_test'
+${CLICKHOUSE_CURL} -X POST -sS "${CLICKHOUSE_URL}&session_id=${SESSION_ID}" -d 'DROP DATABASE non_post_request_test'
--- a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference
+++ b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.reference
--- a/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql
+++ b/tests/queries/0_stateless/02948_optimize_cleanup_as_noop.sql
@ -0,0 +1,7 @@
+# There was a wrong, harmful feature, leading to bugs and data corruption.
+# This feature is removed, but we take care to maintain compatibility on the syntax level, so now it works as a no-op.
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t (x UInt8, PRIMARY KEY x) ENGINE = ReplacingMergeTree;
+OPTIMIZE TABLE t CLEANUP;
+DROP TABLE t;
--- a/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.reference
+++ b/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.reference
@ -0,0 +1 @@
+6	111111111111111111111111111111111111111
--- a/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.sql
+++ b/tests/queries/0_stateless/02949_parallel_replicas_scalar_subquery_big_integer.sql
@ -0,0 +1,9 @@
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (x UInt8) ENGINE = MergeTree ORDER BY x;
+INSERT INTO test VALUES (1), (2), (3);
+
+SET allow_experimental_parallel_reading_from_replicas = 1, max_parallel_replicas = 2, cluster_for_parallel_replicas = 'test_cluster_one_shard_three_replicas_localhost', prefer_localhost_replica = 0, parallel_replicas_for_non_replicated_merge_tree = 1;
+
+WITH (SELECT '111111111111111111111111111111111111111'::UInt128) AS v SELECT sum(x), max(v) FROM test;
+
+DROP TABLE test;
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@ -1,3 +1,4 @@
+v23.11.3.23-stable	2023-12-21
 v23.11.2.11-stable	2023-12-13
 v23.11.1.2711-stable	2023-12-06
 v23.10.5.20-stable	2023-11-25