Merge branch 'master' into cache-better-locks

2024-12-17 11:52:27 +00:00 · 2023-02-17 15:30:38 +01:00 · 2023-02-17 15:30:38 +01:00 · acf6704fd0
commit acf6704fd0
parent d051ee9a1e 0f182f6164
101 changed files with 1246 additions and 379 deletions
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@ -98,7 +98,7 @@ ccache_status
 if [ -n "$MAKE_DEB" ]; then
  # No quotes because I want it to expand to nothing if empty.
  # shellcheck disable=SC2086
-  DESTDIR=/build/packages/root ninja $NINJA_FLAGS install
+  DESTDIR=/build/packages/root ninja $NINJA_FLAGS programs/install
  cp /build/programs/clickhouse-diagnostics /build/packages/root/usr/bin
  cp /build/programs/clickhouse-diagnostics /output
  bash -x /build/packages/build
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -101,11 +101,7 @@ def run_docker_image_with_env(


 def is_release_build(build_type, package_type, sanitizer):
-    return (
-        build_type == ""
-        and package_type == "deb"
-        and sanitizer == ""
-    )
+    return build_type == "" and package_type == "deb" and sanitizer == ""


 def parse_env_variables(
@ -216,6 +212,12 @@ def parse_env_variables(
        cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr")
        cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
        cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
+        # Reduce linking and building time by avoid *install/all dependencies
+        cmake_flags.append("-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON")
+        # Add bridges to the build target
+        build_target = (
+            f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge"
+        )
        if is_release_build(build_type, package_type, sanitizer):
            cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
            result.append("WITH_PERFORMANCE=1")
@ -305,7 +307,7 @@ def parse_env_variables(
        cmake_flags.append("-DCLICKHOUSE_OFFICIAL_BUILD=1")

    result.append('CMAKE_FLAGS="' + " ".join(cmake_flags) + '"')
-    result.append(f"BUILD_TARGET={build_target}")
+    result.append(f"BUILD_TARGET='{build_target}'")

    return result

--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@ -172,8 +172,20 @@ if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
    # so the container can't be finished by ctrl+c
    CLICKHOUSE_WATCHDOG_ENABLE=${CLICKHOUSE_WATCHDOG_ENABLE:-0}
    export CLICKHOUSE_WATCHDOG_ENABLE
+
+    # An option for easy restarting and replacing clickhouse-server in a container, especially in Kubernetes.
+    # For example, you can replace the clickhouse-server binary to another and restart it while keeping the container running.
+    if [[ "${CLICKHOUSE_DOCKER_RESTART_ON_EXIT:-0}" -eq "1" ]]; then
+        while true; do
+            # This runs the server as a child process of the shell script:
+            /usr/bin/clickhouse su "${USER}:${GROUP}" /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@" ||:
+            echo >&2 'ClickHouse Server exited, and the environment variable CLICKHOUSE_DOCKER_RESTART_ON_EXIT is set to 1. Restarting the server.'
+        done
+    else
+        # This replaces the shell script with the server:
        exec /usr/bin/clickhouse su "${USER}:${GROUP}" /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@"
    fi
+fi

 # Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
 exec "$@"
--- a/docker/test/stress/stress
+++ b/docker/test/stress/stress
@ -226,7 +226,6 @@ if __name__ == "__main__":
    )
    parser.add_argument("--test-cmd", default="/usr/bin/clickhouse-test")
    parser.add_argument("--skip-func-tests", default="")
-    parser.add_argument("--client-cmd", default="clickhouse-client")
    parser.add_argument("--server-log-folder", default="/var/log/clickhouse-server")
    parser.add_argument("--output-folder")
    parser.add_argument("--global-time-limit", type=int, default=1800)
@ -294,7 +293,6 @@ if __name__ == "__main__":
                # Use system database to avoid CREATE/DROP DATABASE queries
                "--database=system",
                "--hung-check",
-                "--stress",
                "--report-logs-stats",
                "00001_select_1",
            ]
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -56,13 +56,13 @@ Gcc cannot be used.
 ### Checkout ClickHouse Sources {#checkout-clickhouse-sources}

 ``` bash
-git clone --recursive git@github.com:ClickHouse/ClickHouse.git
+git clone --recursive --shallow-submodules git@github.com:ClickHouse/ClickHouse.git
 ```

 or

 ``` bash
-git clone --recursive https://github.com/ClickHouse/ClickHouse.git
+git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHouse.git
 ```

 ### Build ClickHouse {#build-clickhouse}
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@ -39,7 +39,7 @@ Next, you need to download the source files onto your working machine. This is c

 In the command line terminal run:

-    git clone --recursive git@github.com:your_github_username/ClickHouse.git
+    git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
    cd ClickHouse

 Note: please, substitute *your_github_username* with what is appropriate!
@ -67,7 +67,7 @@ It generally means that the SSH keys for connecting to GitHub are missing. These

 You can also clone the repository via https protocol:

-    git clone --recursive https://github.com/ClickHouse/ClickHouse.git
+    git clone --recursive--shallow-submodules https://github.com/ClickHouse/ClickHouse.git

 This, however, will not let you send your changes to the server. You can still use it temporarily and add the SSH keys later replacing the remote address of the repository with `git remote` command.

--- a/docs/en/operations/monitoring.md
+++ b/docs/en/operations/monitoring.md
@ -16,16 +16,13 @@ You can monitor:

 ## Resource Utilization {#resource-utilization}

-ClickHouse does not monitor the state of hardware resources by itself.
-
-It is highly recommended to set up monitoring for:
+ClickHouse also monitors the state of hardware resources by itself such as:

 -   Load and temperature on processors.
-
-    You can use [dmesg](https://en.wikipedia.org/wiki/Dmesg), [turbostat](https://www.linux.org/docs/man8/turbostat.html) or other instruments.
-
 -   Utilization of storage system, RAM and network.

+This data is collected in the `system.asynchronous_metric_log` table.
+
 ## ClickHouse Server Metrics {#clickhouse-server-metrics}

 ClickHouse server has embedded instruments for self-state monitoring.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -488,6 +488,23 @@ Possible values:

 Default value: 0.

+## group_by_use_nulls {#group_by_use_nulls}
+
+Changes the way the [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md) treats the types of aggregation keys.
+When the `ROLLUP`, `CUBE`, or `GROUPING SETS` specifiers are used, some aggregation keys may not be used to produce some result rows.
+Columns for these keys are filled with either default value or `NULL` in corresponding rows depending on this setting.
+
+Possible values:
+
+-   0 — The default value for the aggregation key type is used to produce missing values.
+-   1 — ClickHouse executes `GROUP BY` the same way as the SQL standard says. The types of aggregation keys are converted to [Nullable](/docs/en/sql-reference/data-types/nullable.md/#data_type-nullable). Columns for corresponding aggregation keys are filled with [NULL](/docs/en/sql-reference/syntax.md) for rows that didn't use it.
+
+Default value: 0.
+
+See also:
+
+-   [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md)
+
 ## partial_merge_join_optimizations {#partial_merge_join_optimizations}

 Disables optimizations in partial merge join algorithm for [JOIN](../../sql-reference/statements/select/join.md) queries.
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -1205,6 +1205,8 @@ Formats a Time according to the given Format string. Format is a constant expres

 formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.

+Alias: `DATE_FORMAT`.
+
 **Syntax**

 ``` sql
@ -1220,6 +1222,9 @@ Using replacement fields, you can define a pattern for the resulting string. “

 | Placeholder | Description                                             | Example    |
 |----------|---------------------------------------------------------|------------|
+| %a       | abbreviated weekday name (Mon-Sun)                      | Mon        |
+| %b       | abbreviated month name (Jan-Dec)                        | Jan        |
+| %c       | month as a decimal number (01-12)                       | 01         |
 | %C       | year divided by 100 and truncated to integer (00-99)    | 20         |
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |
@ -1228,21 +1233,28 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %F       | short YYYY-MM-DD date, equivalent to %Y-%m-%d           | 2018-01-02 |
 | %G       | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V  | 2018         |
 | %g       | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation                                | 18       |
+| %h       | hour in 12h format (01-12)                              | 09         |
 | %H       | hour in 24h format (00-23)                              | 22         |
+| %i       | minute (00-59)                                          | 33         |
 | %I       | hour in 12h format (01-12)                              | 10         |
 | %j       | day of the year (001-366)                               | 002        |
+| %k       | hour in 24h format (00-23)                              | 22         |
+| %l       | hour in 12h format (01-12)                              | 09         |
 | %m       | month as a decimal number (01-12)                       | 01         |
 | %M       | minute (00-59)                                          | 33         |
 | %n       | new-line character (‘’)                                 |            |
 | %p       | AM or PM designation                                    | PM         |
 | %Q       | Quarter (1-4)                                           | 1          |
+| %r       | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p        | 10:30 PM   |
 | %R       | 24-hour HH:MM time, equivalent to %H:%M                 | 22:33      |
+| %s       | second (00-59)                                          | 44         |
 | %S       | second (00-59)                                          | 44         |
 | %t       | horizontal-tab character (’)                            |            |
 | %T       | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44   |
 | %u       | ISO 8601 weekday as number with Monday as 1 (1-7)       | 2          |
 | %V       | ISO 8601 week number (01-53)                            | 01         |
 | %w       | weekday as a decimal number with Sunday as 0 (0-6)      | 2          |
+| %W       | full weekday name (Monday-Sunday)                       | Monday     |
 | %y       | Year, last two digits (00-99)                           | 18         |
 | %Y       | Year                                                    | 2018       |
 | %z       | Time offset from UTC as +HHMM or -HHMM                  | -0500      |
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@ -1841,6 +1841,10 @@ Result:

 ## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n)

+:::note
+This function is not available in ClickHouse Cloud.
+:::
+
 Evaluate external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learing.
 Accepts a path to a catboost model and model arguments (features). Returns Float64.

--- a/docs/en/sql-reference/statements/select/group-by.md
+++ b/docs/en/sql-reference/statements/select/group-by.md
@ -9,7 +9,7 @@ sidebar_label: GROUP BY

 -   `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”.
 -   All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both.
-   Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually this signficantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.
+-   Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually, this significantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct.

 When you want to group data in the table by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments).

@ -115,6 +115,10 @@ The same query also can be written using `WITH` keyword.
 SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP;
 ```

+**See also**
+
+- [group_by_use_nulls](/docs/en/operations/settings/settings.md#group_by_use_nulls) setting for SQL standard compatibility.
+
 ## CUBE Modifier

 `CUBE` modifier is used to calculate subtotals for every combination of the key expressions in the `GROUP BY` list. The subtotals rows are added after the result table.
@ -206,6 +210,9 @@ The same query also can be written using `WITH` keyword.
 SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE;
 ```

+**See also**
+
+- [group_by_use_nulls](/docs/en/operations/settings/settings.md#group_by_use_nulls) setting for SQL standard compatibility.

 ## WITH TOTALS Modifier

@ -321,12 +328,12 @@ For every different key value encountered, `GROUP BY` calculates a set of aggreg
 ## GROUPING SETS modifier

 This is the most general modifier.
-This modifier allows to manually specify several aggregation key sets (grouping sets).
-Aggregation is performed separately for each grouping set, after that all results are combined.
+This modifier allows manually specifying several aggregation key sets (grouping sets).
+Aggregation is performed separately for each grouping set, and after that, all results are combined.
 If a column is not presented in a grouping set, it's filled with a default value.

 In other words, modifiers described above can be represented via `GROUPING SETS`.
-Despite the fact that queries with `ROLLUP`, `CUBE` and `GROUPING SETS` modifiers are syntactically equal, they may have different performance.
+Despite the fact that queries with `ROLLUP`, `CUBE` and `GROUPING SETS` modifiers are syntactically equal, they may perform differently.
 When `GROUPING SETS` try to execute everything in parallel, `ROLLUP` and `CUBE` are executing the final merging of the aggregates in a single thread.

 In the situation when source columns contain default values, it might be hard to distinguish if a row is a part of the aggregation which uses those columns as keys or not.
@ -351,6 +358,10 @@ GROUPING SETS
 );
 ```

+**See also**
+
+- [group_by_use_nulls](/docs/en/operations/settings/settings.md#group_by_use_nulls) setting for SQL standard compatibility.
+
 ## Implementation Details

 Aggregation is one of the most important features of a column-oriented DBMS, and thus it’s implementation is one of the most heavily optimized parts of ClickHouse. By default, aggregation is done in memory using a hash-table. It has 40+ specializations that are chosen automatically depending on “grouping key” data types.
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@ -17,7 +17,8 @@ User=clickhouse
 Group=clickhouse
 Restart=always
 RestartSec=30
-RuntimeDirectory=%p  # %p is resolved to the systemd unit name
+# %p is resolved to the systemd unit name
+RuntimeDirectory=%p 
 ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
 # Minus means that this file is optional.
 EnvironmentFile=-/etc/default/%p
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -59,6 +59,8 @@ option (ENABLE_CLICKHOUSE_SU "A tool similar to 'su'" ${ENABLE_CLICKHOUSE_ALL})

 option (ENABLE_CLICKHOUSE_DISKS "A tool to manage disks" ${ENABLE_CLICKHOUSE_ALL})

+option (ENABLE_CLICKHOUSE_REPORT "A tiny tool to collect a clickhouse-server state" ${ENABLE_CLICKHOUSE_ALL})
+
 if (NOT ENABLE_NURAFT)
    # RECONFIGURE_MESSAGE_LEVEL should not be used here,
    # since ENABLE_NURAFT is set to OFF for FreeBSD and Darwin.
@ -370,6 +372,9 @@ if (ENABLE_CLICKHOUSE_SU)
    install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-su" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
    list(APPEND CLICKHOUSE_BUNDLE clickhouse-su)
 endif ()
+if (ENABLE_CLICKHOUSE_REPORT)
+    include(${ClickHouse_SOURCE_DIR}/utils/report/CMakeLists.txt)
+endif ()

 if (ENABLE_CLICKHOUSE_KEEPER)
    if (NOT BUILD_STANDALONE_KEEPER AND CREATE_KEEPER_SYMLINK)
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -69,6 +69,7 @@ namespace ErrorCodes
    extern const int TOO_DEEP_RECURSION;
    extern const int NETWORK_ERROR;
    extern const int AUTHENTICATION_FAILED;
+    extern const int NO_ELEMENTS_IN_CONFIG;
 }


@ -134,29 +135,34 @@ void Client::parseConnectionsCredentials()
    if (hosts_and_ports.size() >= 2)
        return;

-    String host;
-    std::optional<UInt16> port;
+    std::optional<String> host;
    if (hosts_and_ports.empty())
    {
-        host = config().getString("host", "localhost");
-        if (config().has("port"))
-            port = config().getInt("port");
+        if (config().has("host"))
+            host = config().getString("host");
    }
    else
    {
        host = hosts_and_ports.front().host;
-        port = hosts_and_ports.front().port;
    }

+    String connection;
+    if (config().has("connection"))
+        connection = config().getString("connection");
+    else
+        connection = host.value_or("localhost");
+
    Strings keys;
    config().keys("connections_credentials", keys);
-    for (const auto & connection : keys)
+    bool connection_found = false;
+    for (const auto & key : keys)
    {
-        const String & prefix = "connections_credentials." + connection;
+        const String & prefix = "connections_credentials." + key;

        const String & connection_name = config().getString(prefix + ".name", "");
-        if (connection_name != host)
+        if (connection_name != connection)
            continue;
+        connection_found = true;

        String connection_hostname;
        if (config().has(prefix + ".hostname"))
@ -164,14 +170,9 @@ void Client::parseConnectionsCredentials()
        else
            connection_hostname = connection_name;

-        /// Set "host" unconditionally (since it is used as a "name"), while
-        /// other options only if they are not set yet (config.xml/cli
-        /// options).
+        if (hosts_and_ports.empty())
            config().setString("host", connection_hostname);
-        if (!hosts_and_ports.empty())
-            hosts_and_ports.front().host = connection_hostname;
-
-        if (config().has(prefix + ".port") && !port.has_value())
+        if (config().has(prefix + ".port") && hosts_and_ports.empty())
            config().setInt("port", config().getInt(prefix + ".port"));
        if (config().has(prefix + ".secure") && !config().has("secure"))
            config().setBool("secure", config().getBool(prefix + ".secure"));
@ -189,6 +190,9 @@ void Client::parseConnectionsCredentials()
            config().setString("history_file", history_file);
        }
    }
+
+    if (config().has("connection") && !connection_found)
+        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No such connection '{}' in connections_credentials", connection);
 }

 /// Make query to get all server warnings
@ -955,6 +959,7 @@ void Client::addOptions(OptionsDescription & options_description)
    /// Main commandline options related to client functionality and all parameters from Settings.
    options_description.main_description->add_options()
        ("config,c", po::value<std::string>(), "config-file path (another shorthand)")
+        ("connection", po::value<std::string>(), "connection to use (from the client config), by default connection name is hostname")
        ("secure,s", "Use TLS connection")
        ("user,u", po::value<std::string>()->default_value("default"), "user")
        /** If "--password [value]" is used but the value is omitted, the bad argument exception will be thrown.
@ -1095,6 +1100,8 @@ void Client::processOptions(const OptionsDescription & options_description,

    if (options.count("config"))
        config().setString("config-file", options["config"].as<std::string>());
+    if (options.count("connection"))
+        config().setString("connection", options["connection"].as<std::string>());
    if (options.count("interleave-queries-file"))
        interleave_queries_files = options["interleave-queries-file"].as<std::vector<std::string>>();
    if (options.count("secure"))
--- a/src/Client/Suggest.cpp
+++ b/src/Client/Suggest.cpp
@ -43,7 +43,7 @@ Suggest::Suggest()
        "IN",           "KILL",     "QUERY",  "SYNC",      "ASYNC",    "TEST",        "BETWEEN",  "TRUNCATE",    "USER",    "ROLE",
        "PROFILE",      "QUOTA",    "POLICY", "ROW",       "GRANT",    "REVOKE",      "OPTION",   "ADMIN",       "EXCEPT",  "REPLACE",
        "IDENTIFIED",   "HOST",     "NAME",   "READONLY",  "WRITABLE", "PERMISSIVE",  "FOR",      "RESTRICTIVE", "RANDOMIZED",
-        "INTERVAL",     "LIMITS",   "ONLY",   "TRACKING",  "IP",       "REGEXP",      "ILIKE",
+        "INTERVAL",     "LIMITS",   "ONLY",   "TRACKING",  "IP",       "REGEXP",      "ILIKE",    "CLEANUP"
    });
 }

--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@ -109,6 +109,10 @@ IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS
     {"ReplicatedReplacingMergeTree", DefaultTableEngine::ReplicatedReplacingMergeTree},
     {"Memory", DefaultTableEngine::Memory}})

+IMPLEMENT_SETTING_ENUM(CleanDeletedRows, ErrorCodes::BAD_ARGUMENTS,
+    {{"Never", CleanDeletedRows::Never},
+     {"Always", CleanDeletedRows::Always}})
+
 IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL,
    {{"decimal",    MySQLDataTypesSupport::DECIMAL},
     {"datetime64", MySQLDataTypesSupport::DATETIME64},
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@ -121,6 +121,14 @@ enum class DefaultTableEngine

 DECLARE_SETTING_ENUM(DefaultTableEngine)

+enum class CleanDeletedRows
+{
+    Never = 0, /// Disable.
+    Always,
+};
+
+DECLARE_SETTING_ENUM(CleanDeletedRows)
+
 enum class MySQLDataTypesSupport
 {
    DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@ -1116,8 +1116,7 @@ void BaseDaemon::setupWatchdog()
            logger().information("Child process no longer exists.");
            _exit(WEXITSTATUS(status));
        }
-
-        if (WIFEXITED(status))
+        else if (WIFEXITED(status))
        {
            logger().information(fmt::format("Child process exited normally with code {}.", WEXITSTATUS(status)));
            _exit(WEXITSTATUS(status));
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -192,6 +192,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference;
    format_settings.max_binary_string_size = settings.format_binary_max_string_size;
    format_settings.max_parser_depth = context->getSettingsRef().max_parser_depth;
+    format_settings.client_protocol_version = context->getClientProtocolVersion();

    /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context
    if (format_settings.schema.is_server)
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -82,6 +82,7 @@ struct FormatSettings
    Float32 input_allow_errors_ratio = 0;

    UInt64 max_binary_string_size = 0;
+    UInt64 client_protocol_version = 0;

    UInt64 max_parser_depth = DBMS_DEFAULT_MAX_PARSER_DEPTH;

--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@ -342,12 +342,51 @@ private:
            return writeNumber2(dest, ToMonthImpl::execute(source, timezone));
        }

+        static size_t monthOfYearText(char * dest, Time source, bool abbreviate, UInt64, UInt32, const DateLUTImpl & timezone)
+        {
+            auto month = ToMonthImpl::execute(source, timezone);
+            std::string_view str_view = abbreviate ? monthsShort[month - 1] : monthsFull[month - 1];
+            memcpy(dest, str_view.data(), str_view.size());
+            return str_view.size();
+        }
+
+        static size_t mysqlMonthOfYearTextShort(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        {
+            return monthOfYearText(dest, source, true, fractional_second, scale, timezone);
+        }
+
+        static size_t mysqlMonthOfYearTextLong(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        {
+            return monthOfYearText(dest, source, false, fractional_second, scale, timezone);
+        }
+
        static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
        {
            *dest = '0' + ToDayOfWeekImpl::execute(source, 0, timezone);
            return 1;
        }

+        static size_t dayOfWeekText(char * dest, Time source, bool abbreviate, UInt64, UInt32, const DateLUTImpl & timezone)
+        {
+            auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
+            if (week_day == 7)
+                week_day = 0;
+
+            std::string_view str_view = abbreviate ? weekdaysShort[week_day] : weekdaysFull[week_day];
+            memcpy(dest, str_view.data(), str_view.size());
+            return str_view.size();
+        }
+
+        static size_t mysqlDayOfWeekTextShort(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        {
+            return dayOfWeekText(dest, source, true, fractional_second, scale, timezone);
+        }
+
+        static size_t mysqlDayOfWeekTextLong(char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
+        {
+            return dayOfWeekText(dest, source, false, fractional_second, scale, timezone);
+        }
+
        static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
        {
            auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
@ -411,6 +450,16 @@ private:
            return 5;
        }

+        static size_t mysqlHHMM12(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        {
+            auto hour = ToHourImpl::execute(source, timezone);
+            writeNumber2(dest, hour == 0 ? 12 : (hour > 12 ? hour - 12 : hour));
+            writeNumber2(dest + 3, ToMinuteImpl::execute(source, timezone));
+
+            dest[6] = hour >= 12 ? 'P' : 'A';
+            return 8;
+        }
+
        static size_t mysqlSecond(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
        {
            return writeNumber2(dest, ToSecondImpl::execute(source, timezone));
@ -503,15 +552,10 @@ private:
            return writeNumberWithPadding(dest, week_day, min_represent_digits);
        }

-        static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
        {
-            auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
-            if (week_day == 7)
-                week_day = 0;
-
-            std::string_view str_view = min_represent_digits <= 3 ? weekdaysShort[week_day] : weekdaysFull[week_day];
-            memcpy(dest, str_view.data(), str_view.size());
-            return str_view.size();
+            bool abbreviate = min_represent_digits <= 3;
+            return dayOfWeekText(dest, source, abbreviate, fractional_second, scale, timezone);
        }

        static size_t jodaYear(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
@ -551,12 +595,10 @@ private:
            return writeNumberWithPadding(dest, month_of_year, min_represent_digits);
        }

-        static size_t jodaMonthOfYearText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
+        static size_t jodaMonthOfYearText(size_t min_represent_digits, char * dest, Time source, UInt64 fractional_second, UInt32 scale, const DateLUTImpl & timezone)
        {
-            auto month = ToMonthImpl::execute(source, timezone);
-            std::string_view str_view = min_represent_digits <= 3 ? monthsShort[month - 1] : monthsFull[month - 1];
-            memcpy(dest, str_view.data(), str_view.size());
-            return str_view.size();
+            bool abbreviate = min_represent_digits <= 3;
+            return monthOfYearText(dest, source, abbreviate, fractional_second, scale, timezone);
        }

        static size_t jodaDayOfMonth(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
@ -909,6 +951,24 @@ public:

                switch (*pos)
                {
+                    // Abbreviated weekday [Mon...Sun]
+                    case 'a':
+                        instructions.emplace_back(&Action<T>::mysqlDayOfWeekTextShort);
+                        out_template += "Mon";
+                        break;
+
+                    // Abbreviated month [Jan...Dec]
+                    case 'b':
+                        instructions.emplace_back(&Action<T>::mysqlMonthOfYearTextShort);
+                        out_template += "Jan";
+                        break;
+
+                    // Month as a decimal number (01-12)
+                    case 'c':
+                        instructions.emplace_back(&Action<T>::mysqlMonth);
+                        out_template += "00";
+                        break;
+
                    // Year, divided by 100, zero-padded
                    case 'C':
                        instructions.emplace_back(&Action<T>::mysqlCentury);
@ -990,6 +1050,12 @@ public:
                        out_template += "0";
                        break;

+                    // Full weekday [Monday...Sunday]
+                    case 'W':
+                        instructions.emplace_back(&Action<T>::mysqlDayOfWeekTextLong);
+                        out_template += "Monday";
+                        break;
+
                    // Two digits year
                    case 'y':
                        instructions.emplace_back(&Action<T>::mysqlYear2);
@ -1028,65 +1094,102 @@ public:
                        out_template += "AM";
                        break;

-                    // 24-hour HH:MM time, equivalent to %H:%M 14:55
+                    // 12-hour HH:MM time, equivalent to %h:%i %p 2:55 PM
+                    case 'r':
+                        add_instruction_or_extra_shift(&Action<T>::mysqlHHMM12, 8);
+                        out_template += "12:00 AM";
+                        break;
+
+                    // 24-hour HH:MM time, equivalent to %H:%i 14:55
                    case 'R':
                        add_instruction_or_extra_shift(&Action<T>::mysqlHHMM24, 5);
                        out_template += "00:00";
                        break;

+                    // Seconds
+                    case 's':
+                        add_instruction_or_extra_shift(&Action<T>::mysqlSecond, 2);
+                        out_template += "00";
+                        break;
+
                    // Seconds
                    case 'S':
                        add_instruction_or_extra_shift(&Action<T>::mysqlSecond, 2);
                        out_template += "00";
                        break;

-                    // ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S 14:55:02
+                    // ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S 14:55:02
                    case 'T':
                        add_instruction_or_extra_shift(&Action<T>::mysqlISO8601Time, 8);
                        out_template += "00:00:00";
                        break;

+                    // Hour in 12h format (01-12)
+                    case 'h':
+                        add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
+                        out_template += "12";
+                        break;
+
                    // Hour in 24h format (00-23)
                    case 'H':
                        add_instruction_or_extra_shift(&Action<T>::mysqlHour24, 2);
                        out_template += "00";
                        break;

+                    // Minute of hour range [0, 59]
+                    case 'i':
+                        add_instruction_or_extra_shift(&Action<T>::mysqlMinute, 2);
+                        out_template += "00";
+                        break;
+
                    // Hour in 12h format (01-12)
                    case 'I':
                        add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
                        out_template += "12";
                        break;

-                    /// Escaped literal characters.
-                    case '%':
-                        add_extra_shift(1);
-                        out_template += "%";
+                    // Hour in 24h format (00-23)
+                    case 'k':
+                        add_instruction_or_extra_shift(&Action<T>::mysqlHour24, 2);
+                        out_template += "00";
                        break;
+
+                    // Hour in 12h format (01-12)
+                    case 'l':
+                        add_instruction_or_extra_shift(&Action<T>::mysqlHour12, 2);
+                        out_template += "12";
+                        break;
+
                    case 't':
                        add_extra_shift(1);
                        out_template += "\t";
                        break;
+
                    case 'n':
                        add_extra_shift(1);
                        out_template += "\n";
                        break;

+                    // Escaped literal characters.
+                    case '%':
+                        add_extra_shift(1);
+                        out_template += "%";
+                        break;
+
                    // Unimplemented
                    case 'U':
-                        [[fallthrough]];
-                    case 'W':
-                        throw Exception(
-                            ErrorCodes::NOT_IMPLEMENTED,
-                            "Wrong syntax '{}', symbol '{}' is not implemented for function {}",
-                            format,
-                            *pos,
-                            getName());
+                        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK (Sun-Sat)");
+                    case 'v':
+                        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK (Mon-Sun)");
+                    case 'x':
+                        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for YEAR for week (Mon-Sun)");
+                    case 'X':
+                        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for YEAR for week (Sun-Sat)");

                    default:
                        throw Exception(
-                            ErrorCodes::ILLEGAL_COLUMN,
-                            "Wrong syntax '{}', unexpected symbol '{}' for function {}",
+                            ErrorCodes::BAD_ARGUMENTS,
+                            "Incorrect syntax '{}', symbol is not supported '{}' for function {}",
                            format,
                            *pos,
                            getName());
@ -1337,6 +1440,8 @@ using FunctionFromUnixTimestampInJodaSyntax = FunctionFormatDateTimeImpl<NameFro
 REGISTER_FUNCTION(FormatDateTime)
 {
    factory.registerFunction<FunctionFormatDateTime>();
+    factory.registerAlias("DATE_FORMAT", FunctionFormatDateTime::name);
+
    factory.registerFunction<FunctionFromUnixTimestamp>();
    factory.registerAlias("FROM_UNIXTIME", "fromUnixTimestamp");

--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -4074,4 +4074,14 @@ bool Context::canUseParallelReplicasOnFollower() const
        && getClientInfo().collaborate_with_initiator;
 }

+UInt64 Context::getClientProtocolVersion() const
+{
+    return client_protocol_version;
+}
+
+void Context::setClientProtocolVersion(UInt64 version)
+{
+    client_protocol_version = version;
+}
+
 }
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -279,6 +279,9 @@ private:
    std::optional<MergeTreeAllRangesCallback> merge_tree_all_ranges_callback;
    UUID parallel_replicas_group_uuid{UUIDHelpers::Nil};

+    /// This parameter can be set by the HTTP client to tune the behavior of output formats for compatibility.
+    UInt64 client_protocol_version = 0;
+
    /// Record entities accessed by current query, and store this information in system.query_log.
    struct QueryAccessInfo
    {
@ -828,6 +831,8 @@ public:
    bool tryCheckClientConnectionToMyKeeperCluster() const;

    UInt32 getZooKeeperSessionUptime() const;
+    UInt64 getClientProtocolVersion() const;
+    void setClientProtocolVersion(UInt64 version);

 #if USE_ROCKSDB
    MergeTreeMetadataCachePtr getMergeTreeMetadataCache() const;
--- a/src/Interpreters/InterpreterOptimizeQuery.cpp
+++ b/src/Interpreters/InterpreterOptimizeQuery.cpp
@ -79,7 +79,7 @@ BlockIO InterpreterOptimizeQuery::execute()
    if (auto * snapshot_data = dynamic_cast<MergeTreeData::SnapshotData *>(storage_snapshot->data.get()))
        snapshot_data->parts = {};

-    table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, getContext());
+    table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, ast.cleanup, getContext());

    return {};
 }
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -586,12 +586,17 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                current_info.query = query_ptr;
                current_info.syntax_analyzer_result = syntax_analyzer_result;

+                Names queried_columns = syntax_analyzer_result->requiredSourceColumns();
+                const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
+                if (supported_prewhere_columns.has_value())
+                    std::erase_if(queried_columns, [&](const auto & name) { return !supported_prewhere_columns->contains(name); });
+
                MergeTreeWhereOptimizer{
                    current_info,
                    context,
                    std::move(column_compressed_sizes),
                    metadata_snapshot,
-                    syntax_analyzer_result->requiredSourceColumns(),
+                    queried_columns,
                    log};
            }
        }
@ -1994,6 +1999,27 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
        }
    }

+    /// Set of all (including ALIAS) required columns for PREWHERE
+    auto get_prewhere_columns = [&]()
+    {
+        NameSet columns;
+
+        if (prewhere_info)
+        {
+            /// Get some columns directly from PREWHERE expression actions
+            auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames();
+            columns.insert(prewhere_required_columns.begin(), prewhere_required_columns.end());
+
+            if (prewhere_info->row_level_filter)
+            {
+                auto row_level_required_columns = prewhere_info->row_level_filter->getRequiredColumns().getNames();
+                columns.insert(row_level_required_columns.begin(), row_level_required_columns.end());
+            }
+        }
+
+        return columns;
+    };
+
    /// There are multiple sources of required columns:
    ///  - raw required columns,
    ///  - columns deduced from ALIAS columns,
@ -2003,22 +2029,9 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
    /// before any other executions.
    if (alias_columns_required)
    {
-        NameSet required_columns_from_prewhere; /// Set of all (including ALIAS) required columns for PREWHERE
+        NameSet required_columns_from_prewhere = get_prewhere_columns();
        NameSet required_aliases_from_prewhere; /// Set of ALIAS required columns for PREWHERE

-        if (prewhere_info)
-        {
-            /// Get some columns directly from PREWHERE expression actions
-            auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames();
-            required_columns_from_prewhere.insert(prewhere_required_columns.begin(), prewhere_required_columns.end());
-
-            if (prewhere_info->row_level_filter)
-            {
-                auto row_level_required_columns = prewhere_info->row_level_filter->getRequiredColumns().getNames();
-                required_columns_from_prewhere.insert(row_level_required_columns.begin(), row_level_required_columns.end());
-            }
-        }
-
        /// Expression, that contains all raw required columns
        ASTPtr required_columns_all_expr = std::make_shared<ASTExpressionList>();

@ -2114,6 +2127,18 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
                        required_columns.push_back(column);
        }
    }
+
+    const auto & supported_prewhere_columns = storage->supportedPrewhereColumns();
+    if (supported_prewhere_columns.has_value())
+    {
+        NameSet required_columns_from_prewhere = get_prewhere_columns();
+
+        for (const auto & column_name : required_columns_from_prewhere)
+        {
+            if (!supported_prewhere_columns->contains(column_name))
+                throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Storage {} doesn't support PREWHERE for {}", storage->getName(), column_name);
+        }
+    }
 }

 void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
--- a/src/Interpreters/TransactionLog.cpp
+++ b/src/Interpreters/TransactionLog.cpp
@ -534,36 +534,64 @@ MergeTreeTransactionPtr TransactionLog::tryGetRunningTransaction(const TIDHash &
    return it->second;
 }

-CSN TransactionLog::getCSN(const TransactionID & tid)
+CSN TransactionLog::getCSN(const TransactionID & tid, const std::atomic<CSN> * failback_with_strict_load_csn)
 {
    /// Avoid creation of the instance if transactions are not actually involved
    if (tid == Tx::PrehistoricTID)
        return Tx::PrehistoricCSN;
-    return instance().getCSNImpl(tid.getHash());
+    return instance().getCSNImpl(tid.getHash(), failback_with_strict_load_csn);
 }

-CSN TransactionLog::getCSN(const TIDHash & tid)
+CSN TransactionLog::getCSN(const TIDHash & tid, const std::atomic<CSN> * failback_with_strict_load_csn)
 {
    /// Avoid creation of the instance if transactions are not actually involved
    if (tid == Tx::PrehistoricTID.getHash())
        return Tx::PrehistoricCSN;
-    return instance().getCSNImpl(tid);
+    return instance().getCSNImpl(tid, failback_with_strict_load_csn);
 }

-CSN TransactionLog::getCSNImpl(const TIDHash & tid_hash) const
+CSN TransactionLog::getCSNImpl(const TIDHash & tid_hash, const std::atomic<CSN> * failback_with_strict_load_csn) const
 {
    chassert(tid_hash);
    chassert(tid_hash != Tx::EmptyTID.getHash());

+    {
        std::lock_guard lock{mutex};
        auto it = tid_to_csn.find(tid_hash);
        if (it != tid_to_csn.end())
            return it->second.csn;
+    }
+
+    /// Usually commit csn checked by load memory with memory_order_relaxed option just for performance improvements
+    /// If fast loading fails than getCSN is called.
+    /// There is a race possible, transaction could be committed concurrently. Right before getCSN has been called. In that case tid_to_csn has no tid_hash but commit csn is set.
+    /// In order to be sure, commit csn has to be loaded with memory_order_seq_cst after lookup at tid_to_csn
+    if (failback_with_strict_load_csn)
+        if (CSN maybe_csn = failback_with_strict_load_csn->load())
+            return maybe_csn;

    return Tx::UnknownCSN;
 }

-void TransactionLog::assertTIDIsNotOutdated(const TransactionID & tid)
+CSN TransactionLog::getCSNAndAssert(const TransactionID & tid, std::atomic<CSN> & failback_with_strict_load_csn)
+{
+    /// failback_with_strict_load_csn is not provided to getCSN
+    /// Because it would be checked after assertTIDIsNotOutdated
+    if (CSN maybe_csn = getCSN(tid))
+        return maybe_csn;
+
+    assertTIDIsNotOutdated(tid, &failback_with_strict_load_csn);
+
+   /// If transaction is not outdated then it might be already committed
+   /// We should load CSN again to distinguish it
+   /// Otherwise the transactiuon hasn't been committed yet
+    if (CSN maybe_csn = failback_with_strict_load_csn.load())
+        return maybe_csn;
+
+    return Tx::UnknownCSN;
+}
+
+void TransactionLog::assertTIDIsNotOutdated(const TransactionID & tid, const std::atomic<CSN> * failback_with_strict_load_csn)
 {
    if (tid == Tx::PrehistoricTID)
        return;
@ -573,6 +601,14 @@ void TransactionLog::assertTIDIsNotOutdated(const TransactionID & tid)
    if (tail <= tid.start_csn)
        return;

+    /// At this point of execution tail is lesser that tid.start_csn
+    /// This mean that transaction is either outdated or just has been committed concurrently and the tail moved forward.
+    /// If the second case takes place transaction's commit csn has to be set.
+    /// We should load CSN again to distinguish the second case.
+    if (failback_with_strict_load_csn)
+        if (CSN maybe_csn = failback_with_strict_load_csn->load())
+            return;
+
    throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get CSN for too old TID {}, current tail_ptr is {}, probably it's a bug", tid, tail);
 }

--- a/src/Interpreters/TransactionLog.h
+++ b/src/Interpreters/TransactionLog.h
@ -107,11 +107,14 @@ public:

    /// Returns CSN if transaction with specified ID was committed and UnknownCSN if it was not.
    /// Returns PrehistoricCSN for PrehistoricTID without creating a TransactionLog instance as a special case.
-    static CSN getCSN(const TransactionID & tid);
-    static CSN getCSN(const TIDHash & tid);
+    /// Some time a transaction could be committed concurrently, in order to resolve it provide failback_with_strict_load_csn
+    static CSN getCSN(const TransactionID & tid, const std::atomic<CSN> * failback_with_strict_load_csn = nullptr);
+    static CSN getCSN(const TIDHash & tid, const std::atomic<CSN> * failback_with_strict_load_csn = nullptr);
+    static CSN getCSNAndAssert(const TransactionID & tid, std::atomic<CSN> & failback_with_strict_load_csn);

    /// Ensures that getCSN returned UnknownCSN because transaction is not committed and not because entry was removed from the log.
-    static void assertTIDIsNotOutdated(const TransactionID & tid);
+    static void assertTIDIsNotOutdated(const TransactionID & tid, const std::atomic<CSN> * failback_with_strict_load_csn = nullptr);
+

    /// Returns a pointer to transaction object if it's running or nullptr.
    MergeTreeTransactionPtr tryGetRunningTransaction(const TIDHash & tid);
@ -147,7 +150,8 @@ private:

    ZooKeeperPtr getZooKeeper() const;

-    CSN getCSNImpl(const TIDHash & tid_hash) const;
+    /// Some time a transaction could be committed concurrently, in order to resolve it provide failback_with_strict_load_csn
+    CSN getCSNImpl(const TIDHash & tid_hash, const std::atomic<CSN> * failback_with_strict_load_csn = nullptr) const;

    const ContextPtr global_context;
    Poco::Logger * const log;
--- a/src/Interpreters/TransactionVersionMetadata.cpp
+++ b/src/Interpreters/TransactionVersionMetadata.cpp
@ -20,26 +20,6 @@ namespace ErrorCodes
    extern const int CANNOT_PARSE_TEXT;
 }

-inline static CSN getCSNAndAssert(TIDHash tid_hash, std::atomic<CSN> & csn, const TransactionID * tid = nullptr)
-{
-    CSN maybe_csn = TransactionLog::getCSN(tid_hash);
-    if (maybe_csn)
-        return maybe_csn;
-
-    /// Either transaction is not committed (yet) or it was committed and then the CSN entry was cleaned up from the log.
-    /// We should load CSN again to distinguish the second case.
-    /// If entry was cleaned up, then CSN is already stored in VersionMetadata and we will get it.
-    /// And for the first case we will get UnknownCSN again.
-    maybe_csn = csn.load();
-    if (maybe_csn)
-        return maybe_csn;
-
-    if (tid)
-        TransactionLog::assertTIDIsNotOutdated(*tid);
-
-    return Tx::UnknownCSN;
-}
-
 VersionMetadata::VersionMetadata()
 {
    /// It would be better to make it static, but static loggers do not work for some reason (initialization order?)
@ -217,7 +197,7 @@ bool VersionMetadata::isVisible(CSN snapshot_version, TransactionID current_tid)
    /// so we can determine their visibility through fast path.
    /// But for long-running writing transactions we will always do
    /// CNS lookup and get 0 (UnknownCSN) until the transaction is committed/rolled back.
-    creation = getCSNAndAssert(creation_tid.getHash(), creation_csn, &creation_tid);
+    creation = TransactionLog::getCSNAndAssert(creation_tid, creation_csn);
    if (!creation)
    {
        return false;   /// Part creation is not committed yet
@ -229,7 +209,7 @@ bool VersionMetadata::isVisible(CSN snapshot_version, TransactionID current_tid)

    if (removal_lock)
    {
-        removal = getCSNAndAssert(removal_lock, removal_csn);
+        removal = TransactionLog::getCSN(removal_lock, &removal_csn);
        if (removal)
            removal_csn.store(removal, std::memory_order_relaxed);
    }
@ -267,7 +247,7 @@ bool VersionMetadata::canBeRemovedImpl(CSN oldest_snapshot_version)
    if (!creation)
    {
        /// Cannot remove part if its creation not committed yet
-        creation = getCSNAndAssert(creation_tid.getHash(), creation_csn, &creation_tid);
+        creation = TransactionLog::getCSNAndAssert(creation_tid, creation_csn);
        if (creation)
            creation_csn.store(creation, std::memory_order_relaxed);
        else
@ -287,7 +267,7 @@ bool VersionMetadata::canBeRemovedImpl(CSN oldest_snapshot_version)
    if (!removal)
    {
        /// Part removal is not committed yet
-        removal = getCSNAndAssert(removal_lock, removal_csn);
+        removal = TransactionLog::getCSN(removal_lock, &removal_csn);
        if (removal)
            removal_csn.store(removal, std::memory_order_relaxed);
        else
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@ -363,24 +363,17 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
 {
    ASTs & elements = select_query->select()->children;

-    std::unordered_map<String, size_t> required_columns_with_duplicate_count;
-    /// Order of output columns should match order in required_result_columns,
-    /// otherwise UNION queries may have incorrect header when subselect has duplicated columns.
-    ///
-    /// NOTE: multimap is required since there can be duplicated column names.
-    std::unordered_multimap<String, size_t> output_columns_positions;
+    std::map<String, size_t> required_columns_with_duplicate_count;

    if (!required_result_columns.empty())
    {
        /// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
-        for (size_t i = 0; i < required_result_columns.size(); ++i)
+        for (const auto & name : required_result_columns)
        {
-            const auto & name = required_result_columns[i];
            if (remove_dups)
                required_columns_with_duplicate_count[name] = 1;
            else
                ++required_columns_with_duplicate_count[name];
-            output_columns_positions.emplace(name, i);
        }
    }
    else if (remove_dups)
@ -392,8 +385,8 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
    else
        return;

-    ASTs new_elements(elements.size() + output_columns_positions.size());
-    size_t new_elements_size = 0;
+    ASTs new_elements;
+    new_elements.reserve(elements.size());

    NameSet remove_columns;

@ -401,35 +394,17 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
    {
        String name = elem->getAliasOrColumnName();

-        /// Columns that are presented in output_columns_positions should
-        /// appears in the same order in the new_elements, hence default
-        /// result_index goes after all elements of output_columns_positions
-        /// (it is for columns that are not located in
-        /// output_columns_positions, i.e. untuple())
-        size_t result_index = output_columns_positions.size() + new_elements_size;
-
-        /// Note, order of duplicated columns is not important here (since they
-        /// are the same), only order for unique columns is important, so it is
-        /// fine to use multimap here.
-        if (auto it = output_columns_positions.find(name); it != output_columns_positions.end())
-        {
-            result_index = it->second;
-            output_columns_positions.erase(it);
-        }
-
        auto it = required_columns_with_duplicate_count.find(name);
        if (required_columns_with_duplicate_count.end() != it && it->second)
        {
-            new_elements[result_index] = elem;
+            new_elements.push_back(elem);
            --it->second;
-            ++new_elements_size;
        }
        else if (select_query->distinct || hasArrayJoin(elem))
        {
            /// ARRAY JOIN cannot be optimized out since it may change number of rows,
            /// so as DISTINCT.
-            new_elements[result_index] = elem;
-            ++new_elements_size;
+            new_elements.push_back(elem);
        }
        else
        {
@ -440,25 +415,18 @@ void removeUnneededColumnsFromSelectClause(ASTSelectQuery * select_query, const
            /// Never remove untuple. It's result column may be in required columns.
            /// It is not easy to analyze untuple here, because types were not calculated yet.
            if (func && func->name == "untuple")
-            {
-                new_elements[result_index] = elem;
-                ++new_elements_size;
-            }
+                new_elements.push_back(elem);
+
            /// removing aggregation can change number of rows, so `count()` result in outer sub-query would be wrong
            if (func && !select_query->groupBy())
            {
                GetAggregatesVisitor::Data data = {};
                GetAggregatesVisitor(data).visit(elem);
                if (!data.aggregates.empty())
-                {
-                    new_elements[result_index] = elem;
-                    ++new_elements_size;
+                    new_elements.push_back(elem);
            }
        }
    }
-    }
-    /// Remove empty nodes.
-    std::erase(new_elements, ASTPtr{});

    if (select_query->interpolate())
    {
--- a/src/Parsers/ASTOptimizeQuery.cpp
+++ b/src/Parsers/ASTOptimizeQuery.cpp
@ -24,6 +24,9 @@ void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatSt
    if (deduplicate)
        settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : "");

+    if (cleanup)
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << " CLEANUP" << (settings.hilite ? hilite_none : "");
+
    if (deduplicate_by_columns)
    {
        settings.ostr << (settings.hilite ? hilite_keyword : "") << " BY " << (settings.hilite ? hilite_none : "");
--- a/src/Parsers/ASTOptimizeQuery.h
+++ b/src/Parsers/ASTOptimizeQuery.h
@ -21,11 +21,12 @@ public:
    bool deduplicate = false;
    /// Deduplicate by columns.
    ASTPtr deduplicate_by_columns;
-
+    /// Delete 'is_deleted' data
+    bool cleanup = false;
    /** Get the text that identifies this element. */
    String getID(char delim) const override
    {
-        return "OptimizeQuery" + (delim + getDatabase()) + delim + getTable() + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : "");
+        return "OptimizeQuery" + (delim + getDatabase()) + delim + getTable() + (final ? "_final" : "") + (deduplicate ? "_deduplicate" : "")+ (cleanup ? "_cleanup" : "");
    }

    ASTPtr clone() const override
--- a/src/Parsers/ParserOptimizeQuery.cpp
+++ b/src/Parsers/ParserOptimizeQuery.cpp
@ -28,6 +28,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
    ParserKeyword s_partition("PARTITION");
    ParserKeyword s_final("FINAL");
    ParserKeyword s_deduplicate("DEDUPLICATE");
+    ParserKeyword s_cleanup("CLEANUP");
    ParserKeyword s_by("BY");
    ParserToken s_dot(TokenType::Dot);
    ParserIdentifier name_p(true);
@ -38,6 +39,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
    ASTPtr partition;
    bool final = false;
    bool deduplicate = false;
+    bool cleanup = false;
    String cluster_str;

    if (!s_optimize_table.ignore(pos, expected))
@ -68,6 +70,9 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
    if (s_deduplicate.ignore(pos, expected))
        deduplicate = true;

+    if (s_cleanup.ignore(pos, expected))
+        cleanup = true;
+
    ASTPtr deduplicate_by_columns;
    if (deduplicate && s_by.ignore(pos, expected))
    {
@ -85,6 +90,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
    query->final = final;
    query->deduplicate = deduplicate;
    query->deduplicate_by_columns = deduplicate_by_columns;
+    query->cleanup = cleanup;
    query->database = database;
    query->table = table;

--- a/src/Parsers/ParserOptimizeQuery.h
+++ b/src/Parsers/ParserOptimizeQuery.h
@ -14,7 +14,7 @@ protected:
    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
 };

-/** Query OPTIMIZE TABLE [db.]name [PARTITION partition] [FINAL] [DEDUPLICATE]
+/** Query OPTIMIZE TABLE [db.]name [PARTITION partition] [FINAL] [DEDUPLICATE] [CLEANUP]
  */
 class ParserOptimizeQuery : public IParserBase
 {
--- a/src/Parsers/obfuscateQueries.cpp
+++ b/src/Parsers/obfuscateQueries.cpp
@ -94,6 +94,7 @@ const std::unordered_set<std::string_view> keywords
    "CHAR",
    "CHARACTER",
    "CHECK",
+    "CLEANUP",
    "CLEAR",
    "CLUSTER",
    "CLUSTER_HOST_IDS",
--- a/src/Processors/Formats/Impl/NativeFormat.cpp
+++ b/src/Processors/Formats/Impl/NativeFormat.cpp
@ -55,9 +55,9 @@ private:
 class NativeOutputFormat final : public IOutputFormat
 {
 public:
-    NativeOutputFormat(WriteBuffer & buf, const Block & header)
+    NativeOutputFormat(WriteBuffer & buf, const Block & header, UInt64 client_protocol_version = 0)
        : IOutputFormat(header, buf)
-        , writer(buf, 0, header)
+        , writer(buf, client_protocol_version, header)
    {
    }

@ -115,9 +115,9 @@ void registerOutputFormatNative(FormatFactory & factory)
    factory.registerOutputFormat("Native", [](
        WriteBuffer & buf,
        const Block & sample,
-        const FormatSettings &)
+        const FormatSettings & settings)
    {
-        return std::make_shared<NativeOutputFormat>(buf, sample);
+        return std::make_shared<NativeOutputFormat>(buf, sample, settings.client_protocol_version);
    });
 }

--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp
@ -1,20 +1,31 @@
 #include <Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h>
+
+#include <Columns/ColumnsNumber.h>
 #include <IO/WriteBuffer.h>

 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int INCORRECT_DATA;
+}
+
 ReplacingSortedAlgorithm::ReplacingSortedAlgorithm(
    const Block & header_,
    size_t num_inputs,
    SortDescription description_,
+    const String & is_deleted_column,
    const String & version_column,
    size_t max_block_size,
    WriteBuffer * out_row_sources_buf_,
-    bool use_average_block_sizes)
+    bool use_average_block_sizes,
+    bool cleanup_)
    : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs)
-    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size)
+    , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size), cleanup(cleanup_)
 {
+    if (!is_deleted_column.empty())
+        is_deleted_column_number = header_.getPositionByName(is_deleted_column);
    if (!version_column.empty())
        version_column_number = header_.getPositionByName(version_column);
 }
@ -61,7 +72,15 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()

            /// Write the data for the previous primary key.
            if (!selected_row.empty())
+            {
+                if (is_deleted_column_number!=-1)
+                {
+                    if (!(cleanup && assert_cast<const ColumnUInt8 &>(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]))
                        insertRow();
+                }
+                else
+                    insertRow();
+            }

            selected_row.clear();
        }
@ -71,6 +90,13 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()
        if (out_row_sources_buf)
            current_row_sources.emplace_back(current.impl->order, true);

+        if ((is_deleted_column_number!=-1))
+        {
+            const UInt8 is_deleted = assert_cast<const ColumnUInt8 &>(*current->all_columns[is_deleted_column_number]).getData()[current->getRow()];
+            if ((is_deleted != 1) && (is_deleted != 0))
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect data: is_deleted = {} (must be 1 or 0).", toString(is_deleted));
+        }
+
        /// A non-strict comparison, since we select the last row for the same version values.
        if (version_column_number == -1
            || selected_row.empty()
@ -101,7 +127,15 @@ IMergingAlgorithm::Status ReplacingSortedAlgorithm::merge()

    /// We will write the data for the last primary key.
    if (!selected_row.empty())
+    {
+        if (is_deleted_column_number!=-1)
+        {
+            if (!(cleanup && assert_cast<const ColumnUInt8 &>(*(*selected_row.all_columns)[is_deleted_column_number]).getData()[selected_row.row_num]))
                insertRow();
+        }
+        else
+            insertRow();
+    }

    return Status(merged_data.pull(), true);
 }
--- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
+++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h
@ -20,17 +20,22 @@ class ReplacingSortedAlgorithm final : public IMergingAlgorithmWithSharedChunks
 public:
    ReplacingSortedAlgorithm(
        const Block & header, size_t num_inputs,
-        SortDescription description_, const String & version_column,
+        SortDescription description_,
+        const String & is_deleted_column,
+        const String & version_column,
        size_t max_block_size,
        WriteBuffer * out_row_sources_buf_ = nullptr,
-        bool use_average_block_sizes = false);
+        bool use_average_block_sizes = false,
+        bool cleanup = false);

    Status merge() override;

 private:
    MergedData merged_data;

+    ssize_t is_deleted_column_number = -1;
    ssize_t version_column_number = -1;
+    bool cleanup = false;

    using RowRef = detail::RowRefWithOwnedChunk;
    static constexpr size_t max_row_refs = 2; /// last, current.
--- a/src/Processors/Merges/ReplacingSortedTransform.h
+++ b/src/Processors/Merges/ReplacingSortedTransform.h
@ -13,19 +13,23 @@ class ReplacingSortedTransform final : public IMergingTransform<ReplacingSortedA
 public:
    ReplacingSortedTransform(
        const Block & header, size_t num_inputs,
-        SortDescription description_, const String & version_column,
+        SortDescription description_,
+        const String & is_deleted_column, const String & version_column,
        size_t max_block_size,
        WriteBuffer * out_row_sources_buf_ = nullptr,
-        bool use_average_block_sizes = false)
+        bool use_average_block_sizes = false,
+        bool cleanup = false)
        : IMergingTransform(
            num_inputs, header, header, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0,
            header,
            num_inputs,
            std::move(description_),
+            is_deleted_column,
            version_column,
            max_block_size,
            out_row_sources_buf_,
-            use_average_block_sizes)
+            use_average_block_sizes,
+            cleanup)
    {
    }

--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@ -845,7 +845,7 @@ static void addMergingFinal(

            case MergeTreeData::MergingParams::Replacing:
                return std::make_shared<ReplacingSortedTransform>(header, num_outputs,
-                            sort_description, merging_params.version_column, max_block_size);
+                            sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty());

            case MergeTreeData::MergingParams::VersionedCollapsing:
                return std::make_shared<VersionedCollapsingTransform>(header, num_outputs,
@ -1428,6 +1428,8 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
        std::vector<String> add_columns = metadata_for_reading->getColumnsRequiredForSortingKey();
        column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end());

+        if (!data.merging_params.is_deleted_column.empty())
+            column_names_to_read.push_back(data.merging_params.is_deleted_column);
        if (!data.merging_params.sign_column.empty())
            column_names_to_read.push_back(data.merging_params.sign_column);
        if (!data.merging_params.version_column.empty())
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@ -558,6 +558,13 @@ void HTTPHandler::processQuery(
    auto client_info = session->getClientInfo();
    auto context = session->makeQueryContext(std::move(client_info));

+    /// This parameter is used to tune the behavior of output formats (such as Native) for compatibility.
+    if (params.has("client_protocol_version"))
+    {
+        UInt64 version_param = parse<UInt64>(params.get("client_protocol_version"));
+        context->setClientProtocolVersion(version_param);
+    }
+
    /// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
    String http_response_compression_methods = request.get("Accept-Encoding", "");
    CompressionMethod http_response_compression_method = CompressionMethod::None;
@ -663,7 +670,7 @@ void HTTPHandler::processQuery(
    std::unique_ptr<ReadBuffer> in;

    static const NameSet reserved_param_names{"compress", "decompress", "user", "password", "quota_key", "query_id", "stacktrace",
-        "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check"};
+        "buffer_size", "wait_end_of_query", "session_id", "session_timeout", "session_check", "client_protocol_version"};

    Names reserved_param_suffixes;

--- a/src/Server/TCPHandler.cpp
+++ b/src/Server/TCPHandler.cpp
@ -1,5 +1,4 @@
 #include <algorithm>
-#include <iomanip>
 #include <iterator>
 #include <memory>
 #include <mutex>
@ -24,7 +23,6 @@
 #include <IO/LimitReadBuffer.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-#include <IO/copyData.h>
 #include <Formats/NativeReader.h>
 #include <Formats/NativeWriter.h>
 #include <Interpreters/executeQuery.h>
@ -39,9 +37,7 @@
 #include <Core/ExternalTable.h>
 #include <Access/AccessControl.h>
 #include <Access/Credentials.h>
-#include <Storages/ColumnDefault.h>
 #include <DataTypes/DataTypeLowCardinality.h>
-#include <DataTypes/DataTypeEnum.h>
 #include <Compression/CompressionFactory.h>
 #include <Common/logger_useful.h>
 #include <Common/CurrentMetrics.h>
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@ -135,6 +135,10 @@ public:
    /// Returns true if the storage supports queries with the PREWHERE section.
    virtual bool supportsPrewhere() const { return false; }

+    /// Returns which columns supports PREWHERE, or empty std::nullopt if all columns is supported.
+    /// This is needed for engines whose aggregates data from multiple tables, like Merge.
+    virtual std::optional<NameSet> supportedPrewhereColumns() const { return std::nullopt; }
+
    /// Returns true if the storage supports optimization of moving conditions to PREWHERE section.
    virtual bool canMoveConditionsToPrewhere() const { return supportsPrewhere(); }

@ -481,6 +485,7 @@ public:
        bool /*final*/,
        bool /*deduplicate*/,
        const Names & /* deduplicate_by_columns */,
+        bool /*cleanup*/,
        ContextPtr /*context*/)
    {
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method optimize is not supported by storage {}", getName());
--- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
+++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp
@ -279,6 +279,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare()
            reserved_space,
            entry.deduplicate,
            entry.deduplicate_by_columns,
+            entry.cleanup,
            storage.merging_params,
            NO_TRANSACTION_PTR);

--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp
@ -107,6 +107,7 @@ void MergePlainMergeTreeTask::prepare()
            merge_mutate_entry->tagger->reserved_space,
            deduplicate,
            deduplicate_by_columns,
+            cleanup,
            storage.merging_params,
            txn);
 }
--- a/src/Storages/MergeTree/MergePlainMergeTreeTask.h
+++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.h
@ -19,6 +19,7 @@ public:
        StorageMetadataPtr metadata_snapshot_,
        bool deduplicate_,
        Names deduplicate_by_columns_,
+        bool cleanup_,
        MergeMutateSelectedEntryPtr merge_mutate_entry_,
        TableLockHolder table_lock_holder_,
        IExecutableTask::TaskResultCallback & task_result_callback_)
@ -26,6 +27,7 @@ public:
        , metadata_snapshot(std::move(metadata_snapshot_))
        , deduplicate(deduplicate_)
        , deduplicate_by_columns(std::move(deduplicate_by_columns_))
+        , cleanup(cleanup_)
        , merge_mutate_entry(std::move(merge_mutate_entry_))
        , table_lock_holder(std::move(table_lock_holder_))
        , task_result_callback(task_result_callback_)
@ -66,6 +68,7 @@ private:
    StorageMetadataPtr metadata_snapshot;
    bool deduplicate;
    Names deduplicate_by_columns;
+    bool cleanup;
    MergeMutateSelectedEntryPtr merge_mutate_entry{nullptr};
    TableLockHolder table_lock_holder;
    FutureMergedMutatedPartPtr future_part{nullptr};
--- a/src/Storages/MergeTree/MergeTask.cpp
+++ b/src/Storages/MergeTree/MergeTask.cpp
@ -66,7 +66,10 @@ static void extractMergingAndGatheringColumns(

    /// Force version column for Replacing mode
    if (merging_params.mode == MergeTreeData::MergingParams::Replacing)
+    {
+        key_columns.emplace(merging_params.is_deleted_column);
        key_columns.emplace(merging_params.version_column);
+    }

    /// Force sign column for VersionedCollapsing mode. Version is already in primary key.
    if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing)
@ -673,6 +676,7 @@ bool MergeTask::MergeProjectionsStage::mergeMinMaxIndexAndPrepareProjections() c
            global_ctx->space_reservation,
            global_ctx->deduplicate,
            global_ctx->deduplicate_by_columns,
+            global_ctx->cleanup,
            projection_merging_params,
            global_ctx->need_prefix,
            global_ctx->new_data_part.get(),
@ -907,8 +911,9 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()

        case MergeTreeData::MergingParams::Replacing:
            merged_transform = std::make_shared<ReplacingSortedTransform>(
-                header, pipes.size(), sort_description, ctx->merging_params.version_column,
-                merge_block_size, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size);
+                header, pipes.size(), sort_description, ctx->merging_params.is_deleted_column, ctx->merging_params.version_column,
+                merge_block_size, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size,
+                (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup);
            break;

        case MergeTreeData::MergingParams::Graphite:
--- a/src/Storages/MergeTree/MergeTask.h
+++ b/src/Storages/MergeTree/MergeTask.h
@ -58,6 +58,7 @@ public:
        ReservationSharedPtr space_reservation_,
        bool deduplicate_,
        Names deduplicate_by_columns_,
+        bool cleanup_,
        MergeTreeData::MergingParams merging_params_,
        bool need_prefix,
        IMergeTreeDataPart * parent_part_,
@ -81,6 +82,7 @@ public:
            global_ctx->space_reservation = std::move(space_reservation_);
            global_ctx->deduplicate = std::move(deduplicate_);
            global_ctx->deduplicate_by_columns = std::move(deduplicate_by_columns_);
+            global_ctx->cleanup = std::move(cleanup_);
            global_ctx->parent_part = std::move(parent_part_);
            global_ctx->data = std::move(data_);
            global_ctx->mutator = std::move(mutator_);
@ -142,6 +144,7 @@ private:
        ReservationSharedPtr space_reservation{nullptr};
        bool deduplicate{false};
        Names deduplicate_by_columns{};
+        bool cleanup{false};

        NamesAndTypesList gathering_columns{};
        NamesAndTypesList merging_columns{};
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -719,6 +719,10 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
 {
    const auto columns = metadata.getColumns().getAllPhysical();

+    if (!is_deleted_column.empty() && mode != MergingParams::Replacing)
+        throw Exception(ErrorCodes::LOGICAL_ERROR,
+                        "is_deleted column for MergeTree cannot be specified in modes except Replacing.");
+
    if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing)
        throw Exception(ErrorCodes::LOGICAL_ERROR,
                        "Sign column for MergeTree cannot be specified "
@ -788,6 +792,41 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
            throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Version column {} does not exist in table declaration.", version_column);
    };

+    /// Check that if the is_deleted column is needed, it exists and is of type UInt8. If exist, version column must be defined too but version checks are not done here.
+    auto check_is_deleted_column = [this, & columns](bool is_optional, const std::string & storage)
+    {
+        if (is_deleted_column.empty())
+        {
+            if (is_optional)
+                return;
+
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: is_deleted ({}) column for storage {} is empty", is_deleted_column, storage);
+        }
+        else
+        {
+            if (version_column.empty() && !is_optional)
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column ({}) for storage {} is empty while is_deleted ({}) is not.",
+                                version_column, storage, is_deleted_column);
+
+            bool miss_is_deleted_column = true;
+            for (const auto & column : columns)
+            {
+                if (column.name == is_deleted_column)
+                {
+                    if (!typeid_cast<const DataTypeUInt8 *>(column.type.get()))
+                        throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "is_deleted column ({}) for storage {} must have type UInt8. Provided column of type {}.",
+                                        is_deleted_column, storage, column.type->getName());
+                    miss_is_deleted_column = false;
+                    break;
+                }
+            }
+
+            if (miss_is_deleted_column)
+                throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "is_deleted column {} does not exist in table declaration.", is_deleted_column);
+        }
+    };
+
+
    if (mode == MergingParams::Collapsing)
        check_sign_column(false, "CollapsingMergeTree");

@ -823,7 +862,10 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
    }

    if (mode == MergingParams::Replacing)
+    {
+        check_is_deleted_column(true, "ReplacingMergeTree");
        check_version_column(true, "ReplacingMergeTree");
+    }

    if (mode == MergingParams::VersionedCollapsing)
    {
@ -1190,11 +1232,10 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(

        if (!res.part->version.creation_csn)
        {
-            auto min = TransactionLog::getCSN(res.part->version.creation_tid);
+            auto min = TransactionLog::getCSNAndAssert(res.part->version.creation_tid, res.part->version.creation_csn);
            if (!min)
            {
                /// Transaction that created this part was not committed. Remove part.
-                TransactionLog::assertTIDIsNotOutdated(res.part->version.creation_tid);
                min = Tx::RolledBackCSN;
            }

@ -1207,7 +1248,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(

        if (!version.removal_tid.isEmpty() && !version.removal_csn)
        {
-            auto max = TransactionLog::getCSN(version.removal_tid);
+            auto max = TransactionLog::getCSNAndAssert(version.removal_tid, version.removal_csn);
            if (max)
            {
                LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: part has removal_tid={}, setting removal_csn={}",
@ -1216,7 +1257,6 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart(
            }
            else
            {
-                TransactionLog::assertTIDIsNotOutdated(version.removal_tid);
                /// Transaction that tried to remove this part was not committed. Clear removal_tid.
                LOG_TRACE(log, "Will fix version metadata of {} after unclean restart: clearing removal_tid={}",
                            res.part->name, version.removal_tid);
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@ -334,6 +334,9 @@ public:
        /// For Collapsing and VersionedCollapsing mode.
        String sign_column;

+        /// For Replacing mode. Can be empty for Replacing.
+        String is_deleted_column;
+
        /// For Summing mode. If empty - columns_to_sum is determined automatically.
        Names columns_to_sum;

--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@ -525,6 +525,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart(
    ReservationSharedPtr space_reservation,
    bool deduplicate,
    const Names & deduplicate_by_columns,
+    bool cleanup,
    const MergeTreeData::MergingParams & merging_params,
    const MergeTreeTransactionPtr & txn,
    bool need_prefix,
@ -541,6 +542,7 @@ MergeTaskPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart(
        space_reservation,
        deduplicate,
        deduplicate_by_columns,
+        cleanup,
        merging_params,
        need_prefix,
        parent_part,
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@ -111,6 +111,7 @@ public:
        ReservationSharedPtr space_reservation,
        bool deduplicate,
        const Names & deduplicate_by_columns,
+        bool cleanup,
        const MergeTreeData::MergingParams & merging_params,
        const MergeTreeTransactionPtr & txn,
        bool need_prefix = true,
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@ -281,7 +281,7 @@ Block MergeTreeDataWriter::mergeBlock(
                return nullptr;
            case MergeTreeData::MergingParams::Replacing:
                return std::make_shared<ReplacingSortedAlgorithm>(
-                    block, 1, sort_description, merging_params.version_column, block_size + 1);
+                    block, 1, sort_description, merging_params.is_deleted_column, merging_params.version_column, block_size + 1);
            case MergeTreeData::MergingParams::Collapsing:
                return std::make_shared<CollapsingSortedAlgorithm>(
                    block, 1, sort_description, merging_params.sign_column,
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@ -3,6 +3,7 @@
 #include <base/unit.h>
 #include <Core/Defines.h>
 #include <Core/BaseSettings.h>
+#include <Core/SettingsEnums.h>
 #include <Interpreters/Context_fwd.h>
 #include <Storages/MergeTree/MergeTreeDataFormatVersion.h>

@ -69,6 +70,7 @@ struct Settings;
    M(Bool, min_age_to_force_merge_on_partition_only, false, "Whether min_age_to_force_merge_seconds should be applied only on the entire partition and not on subset.", false) \
    M(UInt64, merge_tree_enable_clear_old_broken_detached, false, "Enable clearing old broken detached parts operation in background.", 0) \
    M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \
+    M(CleanDeletedRows, clean_deleted_rows, CleanDeletedRows::Never, "Is the Replicated Merge cleanup has to be done automatically at each merge or manually (possible values are 'Always'/'Never' (default))", 0) \
    \
    /** Inserts settings. */ \
    M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@ -893,6 +893,7 @@ public:
                ctx->space_reservation,
                false, // TODO Do we need deduplicate for projections
                {},
+                false, // no cleanup
                projection_merging_params,
                NO_TRANSACTION_PTR,
                /* need_prefix */ true,
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@ -96,6 +96,9 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
                }
            }

+            if (cleanup)
+                out << "\ncleanup: " << cleanup;
+
            break;

        case DROP_RANGE:
@ -269,10 +272,13 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in, MergeTreeDataFor

                    deduplicate_by_columns = std::move(new_deduplicate_by_columns);
                }
+                else if (checkString("cleanup: ", in))
+                    in >> cleanup;
                else
                    trailing_newline_found = true;
            }
        }
+
    }
    else if (type_str == "drop" || type_str == "detach")
    {
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@ -98,6 +98,7 @@ struct ReplicatedMergeTreeLogEntryData
    Strings source_parts;
    bool deduplicate = false; /// Do deduplicate on merge
    Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default).
+    bool cleanup = false;
    MergeType merge_type = MergeType::Regular;
    String column_name;
    String index_name;
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@ -136,7 +136,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
        * CollapsingMergeTree(date, [sample_key], primary_key, index_granularity, sign)
        * SummingMergeTree(date, [sample_key], primary_key, index_granularity, [columns_to_sum])
        * AggregatingMergeTree(date, [sample_key], primary_key, index_granularity)
-        * ReplacingMergeTree(date, [sample_key], primary_key, index_granularity, [version_column])
+        * ReplacingMergeTree(date, [sample_key], primary_key, index_granularity, [version_column [, is_deleted_column]])
        * GraphiteMergeTree(date, [sample_key], primary_key, index_granularity, 'config_element')
        *
        * Alternatively, you can specify:
@ -227,6 +227,7 @@ static StoragePtr create(const StorageFactory::Arguments & args)
            add_optional_param("list of columns to sum");
            break;
        case MergeTreeData::MergingParams::Replacing:
+            add_optional_param("is_deleted column");
            add_optional_param("version");
            break;
        case MergeTreeData::MergingParams::Collapsing:
@ -438,11 +439,20 @@ static StoragePtr create(const StorageFactory::Arguments & args)
    }
    else if (merging_params.mode == MergeTreeData::MergingParams::Replacing)
    {
+        // if there is args and number of optional parameter is higher than 1
+        // is_deleted is not allowed with the 'allow_deprecated_syntax_for_merge_tree' settings
+        if (arg_cnt - arg_num == 2 && !engine_args[arg_cnt - 1]->as<ASTLiteral>() && is_extended_storage_def)
+        {
+            if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.is_deleted_column))
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "is_deleted column name must be an identifier {}", verbose_help_message);
+            --arg_cnt;
+        }
+
        /// If the last element is not index_granularity or replica_name (a literal), then this is the name of the version column.
        if (arg_cnt && !engine_args[arg_cnt - 1]->as<ASTLiteral>())
        {
            if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.version_column))
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Version column name must be an unquoted string{}", verbose_help_message);
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Version column name must be an identifier {}", verbose_help_message);
            --arg_cnt;
        }
    }
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@ -691,7 +691,7 @@ void StorageBuffer::flush()

    try
    {
-        optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, getContext());
+        optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, false /*cleanup*/, getContext());
    }
    catch (...)
    {
@ -717,6 +717,7 @@ bool StorageBuffer::optimize(
    bool final,
    bool deduplicate,
    const Names & /* deduplicate_by_columns */,
+    bool cleanup,
    ContextPtr /*context*/)
 {
    if (partition)
@ -728,6 +729,9 @@ bool StorageBuffer::optimize(
    if (deduplicate)
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DEDUPLICATE cannot be specified when optimizing table of type Buffer");

+    if (cleanup)
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "CLEANUP cannot be specified when optimizing table of type Buffer");
+
    flushAllBuffers(false);
    return true;
 }
@ -1057,7 +1061,7 @@ void StorageBuffer::alter(const AlterCommands & params, ContextPtr local_context
    /// Flush all buffers to storages, so that no non-empty blocks of the old
    /// structure remain. Structure of empty blocks will be updated during first
    /// insert.
-    optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, {}, local_context);
+    optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, {}, false /*cleanup*/, local_context);

    StorageInMemoryMetadata new_metadata = *metadata_snapshot;
    params.apply(new_metadata, local_context);
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@ -100,6 +100,7 @@ public:
        bool final,
        bool deduplicate,
        const Names & deduplicate_by_columns,
+        bool cleanup,
        ContextPtr context) override;

    bool supportsSampling() const override { return true; }
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@ -241,12 +241,13 @@ bool StorageMaterializedView::optimize(
    bool final,
    bool deduplicate,
    const Names & deduplicate_by_columns,
+    bool cleanup,
    ContextPtr local_context)
 {
    checkStatementCanBeForwarded();
    auto storage_ptr = getTargetTable();
    auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
-    return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context);
+    return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context);
 }

 void StorageMaterializedView::alter(
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@ -53,6 +53,7 @@ public:
        bool final,
        bool deduplicate,
        const Names & deduplicate_by_columns,
+        bool cleanup,
        ContextPtr context) override;

    void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@ -24,6 +24,7 @@
 #include <Columns/ColumnString.h>
 #include <Common/typeid_cast.h>
 #include <Common/checkStackSize.h>
+#include "DataTypes/IDataType.h"
 #include <Processors/QueryPlan/ReadFromMergeTree.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
@ -145,43 +146,11 @@ bool StorageMerge::tableSupportsPrewhere() const
    /// If new table that matches regexp for current storage and doesn't support PREWHERE
    /// will appear after this check and before calling "read" method, the optimized query may fail.
    /// Since it's quite rare case, we just ignore this possibility.
-    const auto & table_doesnt_support_prewhere = getFirstTable([](const auto & table) { return !table->canMoveConditionsToPrewhere(); });
-    bool supports_prewhere = (table_doesnt_support_prewhere == nullptr);
-
-    if (!supports_prewhere)
-        return false;
-
-    if (!getInMemoryMetadataPtr())
-        return false;
-
-    std::unordered_map<std::string, const IDataType *> column_types;
-    for (const auto & name_type : getInMemoryMetadataPtr()->getColumns().getAll())
-    {
-        column_types.emplace(name_type.name, name_type.type.get());
-    }
-
-    /// Check that all tables have the same column types, otherwise prewhere will fail
-    forEachTable([&](const StoragePtr & table)
-    {
-        const auto & metadata_ptr = table->getInMemoryMetadataPtr();
-        if (!metadata_ptr)
-            supports_prewhere = false;
-
-        if (!supports_prewhere)
-            return;
-
-        for (const auto & column : metadata_ptr->getColumns().getAll())
-        {
-            const auto * src_type = column_types[column.name];
-            if (src_type && !src_type->equals(*column.type))
-            {
-                supports_prewhere = false;
-                return;
-            }
-        }
-    });
-
-    return supports_prewhere;
+    ///
+    /// NOTE: Type can be different, and in this case, PREWHERE cannot be
+    /// applied for those columns, but there a separate method to return
+    /// supported columns for PREWHERE - supportedPrewhereColumns().
+    return getFirstTable([](const auto & table) { return !table->canMoveConditionsToPrewhere(); }) == nullptr;
 }

 bool StorageMerge::canMoveConditionsToPrewhere() const
@ -189,6 +158,48 @@ bool StorageMerge::canMoveConditionsToPrewhere() const
    return tableSupportsPrewhere();
 }

+std::optional<NameSet> StorageMerge::supportedPrewhereColumns() const
+{
+    bool supports_prewhere = true;
+
+    const auto & metadata = getInMemoryMetadata();
+    const auto & columns = metadata.getColumns();
+
+    NameSet supported_columns;
+
+    std::unordered_map<std::string, std::pair<const IDataType *, std::optional<ColumnDefault>>> column_type_default;
+    for (const auto & name_type : columns.getAll())
+    {
+        column_type_default.emplace(name_type.name, std::make_pair(
+            name_type.type.get(), columns.getDefault(name_type.name)));
+        supported_columns.emplace(name_type.name);
+    }
+
+    forEachTable([&](const StoragePtr & table)
+    {
+        const auto & table_metadata_ptr = table->getInMemoryMetadataPtr();
+        if (!table_metadata_ptr)
+            supports_prewhere = false;
+        if (!supports_prewhere)
+            return;
+
+        const auto & table_columns = table_metadata_ptr->getColumns();
+        for (const auto & column : table_columns.getAll())
+        {
+            const auto & root_type_default = column_type_default[column.name];
+            const IDataType * root_type = root_type_default.first;
+            const std::optional<ColumnDefault> & src_default = root_type_default.second;
+            if ((root_type && !root_type->equals(*column.type)) ||
+                src_default != table_columns.getDefault(column.name))
+            {
+                supported_columns.erase(column.name);
+            }
+        }
+    });
+
+    return supported_columns;
+}
+
 bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const
 {
    /// It's beneficial if it is true for at least one table.
@ -300,12 +311,6 @@ void StorageMerge::read(
    auto modified_context = Context::createCopy(local_context);
    modified_context->setSetting("optimize_move_to_prewhere", false);

-    if (query_info.prewhere_info && !tableSupportsPrewhere())
-        throw DB::Exception(
-            DB::ErrorCodes::ILLEGAL_PREWHERE,
-            "Cannot use PREWHERE with table {}, probably some columns don't have same type or an underlying table doesn't support PREWHERE",
-            getStorageID().getTableName());
-
    bool has_database_virtual_column = false;
    bool has_table_virtual_column = false;
    Names real_column_names;
--- a/src/Storages/StorageMerge.h
+++ b/src/Storages/StorageMerge.h
@ -47,6 +47,7 @@ public:
    bool supportsIndexForIn() const override { return true; }
    bool supportsSubcolumns() const override { return true; }
    bool supportsPrewhere() const override { return true; }
+    std::optional<NameSet> supportedPrewhereColumns() const override;

    bool canMoveConditionsToPrewhere() const override;

--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -976,6 +976,7 @@ bool StorageMergeTree::merge(
    bool final,
    bool deduplicate,
    const Names & deduplicate_by_columns,
+    bool cleanup,
    const MergeTreeTransactionPtr & txn,
    String * out_disable_reason,
    bool optimize_skip_merged_partitions)
@ -1015,7 +1016,7 @@ bool StorageMergeTree::merge(
    /// Copying a vector of columns `deduplicate by columns.
    IExecutableTask::TaskResultCallback f = [](bool) {};
    auto task = std::make_shared<MergePlainMergeTreeTask>(
-        *this, metadata_snapshot, deduplicate, deduplicate_by_columns, merge_mutate_entry, table_lock_holder, f);
+        *this, metadata_snapshot, deduplicate, deduplicate_by_columns, cleanup, merge_mutate_entry, table_lock_holder, f);

    task->setCurrentTransaction(MergeTreeTransactionHolder{}, MergeTreeTransactionPtr{txn});

@ -1227,7 +1228,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign

    if (merge_entry)
    {
-        auto task = std::make_shared<MergePlainMergeTreeTask>(*this, metadata_snapshot, false, Names{}, merge_entry, shared_lock, common_assignee_trigger);
+        auto task = std::make_shared<MergePlainMergeTreeTask>(*this, metadata_snapshot, /* deduplicate */ false, Names{}, /* cleanup */ false, merge_entry, shared_lock, common_assignee_trigger);
        task->setCurrentTransaction(std::move(transaction_for_merge), std::move(txn));
        bool scheduled = assignee.scheduleMergeMutateTask(task);
        /// The problem that we already booked a slot for TTL merge, but a merge list entry will be created only in a prepare method
@ -1362,6 +1363,7 @@ bool StorageMergeTree::optimize(
    bool final,
    bool deduplicate,
    const Names & deduplicate_by_columns,
+    bool cleanup,
    ContextPtr local_context)
 {
    if (deduplicate)
@ -1377,6 +1379,13 @@ bool StorageMergeTree::optimize(
    String disable_reason;
    if (!partition && final)
    {
+        if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing)
+        {
+            constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}";
+            disable_reason = "only ReplacingMergeTree can be CLEANUP";
+            throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason);
+        }
+
        DataPartsVector data_parts = getVisibleDataPartsVector(local_context);
        std::unordered_set<String> partition_ids;

@ -1391,6 +1400,7 @@ bool StorageMergeTree::optimize(
                    true,
                    deduplicate,
                    deduplicate_by_columns,
+                    cleanup,
                    txn,
                    &disable_reason,
                    local_context->getSettingsRef().optimize_skip_merged_partitions))
@ -1418,6 +1428,7 @@ bool StorageMergeTree::optimize(
                final,
                deduplicate,
                deduplicate_by_columns,
+                cleanup,
                txn,
                &disable_reason,
                local_context->getSettingsRef().optimize_skip_merged_partitions))
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@ -83,6 +83,7 @@ public:
        bool final,
        bool deduplicate,
        const Names & deduplicate_by_columns,
+        bool cleanup,
        ContextPtr context) override;

    void mutate(const MutationCommands & commands, ContextPtr context) override;
@ -165,6 +166,7 @@ private:
            const String & partition_id,
            bool final, bool deduplicate,
            const Names & deduplicate_by_columns,
+            bool cleanup,
            const MergeTreeTransactionPtr & txn,
            String * out_disable_reason = nullptr,
            bool optimize_skip_merged_partitions = false);
--- a/src/Storages/StorageProxy.h
+++ b/src/Storages/StorageProxy.h
@ -127,9 +127,10 @@ public:
            bool final,
            bool deduplicate,
            const Names & deduplicate_by_columns,
+            bool cleanup,
            ContextPtr context) override
    {
-        return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, context);
+        return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, context);
    }

    void mutate(const MutationCommands & commands, ContextPtr context) override { getNested()->mutate(commands, context); }
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -3171,6 +3171,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
    const auto storage_settings_ptr = getSettings();
    const bool deduplicate = false; /// TODO: read deduplicate option from table config
    const Names deduplicate_by_columns = {};
+    const bool cleanup = (storage_settings_ptr->clean_deleted_rows != CleanDeletedRows::Never);
    CreateMergeEntryResult create_result = CreateMergeEntryResult::Other;

    try
@ -3222,6 +3223,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
                    future_merged_part->part_format,
                    deduplicate,
                    deduplicate_by_columns,
+                    cleanup,
                    nullptr,
                    merge_pred.getVersion(),
                    future_merged_part->merge_type);
@ -3313,6 +3315,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
    const MergeTreeDataPartFormat & merged_part_format,
    bool deduplicate,
    const Names & deduplicate_by_columns,
+    bool cleanup,
    ReplicatedMergeTreeLogEntryData * out_log_entry,
    int32_t log_version,
    MergeType merge_type)
@ -3352,6 +3355,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
    entry.merge_type = merge_type;
    entry.deduplicate = deduplicate;
    entry.deduplicate_by_columns = deduplicate_by_columns;
+    entry.cleanup = cleanup;
    entry.create_time = time(nullptr);

    for (const auto & part : parts)
@ -4799,6 +4803,7 @@ bool StorageReplicatedMergeTree::optimize(
    bool final,
    bool deduplicate,
    const Names & deduplicate_by_columns,
+    bool cleanup,
    ContextPtr query_context)
 {
    /// NOTE: exclusive lock cannot be used here, since this may lead to deadlock (see comments below),
@ -4810,6 +4815,9 @@ bool StorageReplicatedMergeTree::optimize(
    if (!is_leader)
        throw Exception(ErrorCodes::NOT_A_LEADER, "OPTIMIZE cannot be done on this replica because it is not a leader");

+    if (cleanup)
+        LOG_DEBUG(log, "Cleanup the ReplicatedMergeTree.");
+
    auto handle_noop = [&]<typename... Args>(FormatStringHelper<Args...> fmt_string, Args && ...args)
    {
        PreformattedMessage message = fmt_string.format(std::forward<Args>(args)...);
@ -4888,6 +4896,7 @@ bool StorageReplicatedMergeTree::optimize(
                future_merged_part->uuid,
                future_merged_part->part_format,
                deduplicate, deduplicate_by_columns,
+                cleanup,
                &merge_entry, can_merge.getVersion(),
                future_merged_part->merge_type);

@ -4912,6 +4921,13 @@ bool StorageReplicatedMergeTree::optimize(
    bool assigned = false;
    if (!partition && final)
    {
+        if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing)
+        {
+            constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}";
+            String disable_reason = "only ReplacingMergeTree can be CLEANUP";
+            throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason);
+        }
+
        DataPartsVector data_parts = getVisibleDataPartsVector(query_context);
        std::unordered_set<String> partition_ids;

--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@ -149,6 +149,7 @@ public:
        bool final,
        bool deduplicate,
        const Names & deduplicate_by_columns,
+        bool cleanup,
        ContextPtr query_context) override;

    void alter(const AlterCommands & commands, ContextPtr query_context, AlterLockHolder & table_lock_holder) override;
@ -634,6 +635,7 @@ private:
        const MergeTreeDataPartFormat & merged_part_format,
        bool deduplicate,
        const Names & deduplicate_by_columns,
+        bool cleanup,
        ReplicatedMergeTreeLogEntryData * out_log_entry,
        int32_t log_version,
        MergeType merge_type);
--- a/src/Storages/WindowView/StorageWindowView.cpp
+++ b/src/Storages/WindowView/StorageWindowView.cpp
@ -428,11 +428,12 @@ bool StorageWindowView::optimize(
    bool final,
    bool deduplicate,
    const Names & deduplicate_by_columns,
+    bool cleanup,
    ContextPtr local_context)
 {
    auto storage_ptr = getInnerTable();
    auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
-    return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context);
+    return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context);
 }

 void StorageWindowView::alter(
--- a/src/Storages/WindowView/StorageWindowView.h
+++ b/src/Storages/WindowView/StorageWindowView.h
@ -134,6 +134,7 @@ public:
        bool final,
        bool deduplicate,
        const Names & deduplicate_by_columns,
+        bool cleanup,
        ContextPtr context) override;

    void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
--- a/tests/ci/ccache_utils.py
+++ b/tests/ci/ccache_utils.py
@ -77,10 +77,21 @@ def get_ccache_if_not_exists(
    for pr_number in prs_to_check:
        logging.info("Searching cache for pr %s", pr_number)
        s3_path_prefix = str(pr_number) + "/ccaches"
-        objects = s3_helper.list_prefix(s3_path_prefix)
-        logging.info("Found %s objects for pr", len(objects))
-        for obj in objects:
-            if ccache_name in obj:
+        all_cache_objects = s3_helper.list_prefix(s3_path_prefix)
+        logging.info("Found %s objects for pr %s", len(all_cache_objects), pr_number)
+        objects = [obj for obj in all_cache_objects if ccache_name in obj]
+        if not objects:
+            continue
+        logging.info(
+            "Found ccache archives for pr %s: %s", pr_number, ", ".join(objects)
+        )
+
+        obj = objects[0]
+        # There are multiple possible caches, the newest one ends with .tar.zst
+        zst_cache = [obj for obj in objects if obj.endswith(".tar.zst")]
+        if zst_cache:
+            obj = zst_cache[0]
+
        logging.info("Found ccache on path %s", obj)
        url = f"{S3_DOWNLOAD}/{S3_BUILDS_BUCKET}/{obj}"
        compressed_cache = os.path.join(temp_path, os.path.basename(obj))
@ -100,8 +111,6 @@ def get_ccache_if_not_exists(
        cache_found = True
        ccache_pr = pr_number
        break
-        if cache_found:
-            break

    if not cache_found:
        logging.info("ccache not found anywhere, cannot download anything :(")
--- a/tests/ci/release.py
+++ b/tests/ci/release.py
@ -104,8 +104,10 @@ class Release:

    def set_release_info(self):
        # Fetch release commit and tags in case they don't exist locally
-        self.run(f"git fetch {self.repo.url} {self.release_commit}")
-        self.run(f"git fetch {self.repo.url} --tags")
+        self.run(
+            f"git fetch {self.repo.url} {self.release_commit} --no-recurse-submodules"
+        )
+        self.run(f"git fetch {self.repo.url} --tags --no-recurse-submodules")

        # Get the actual version for the commit before check
        with self._checkout(self.release_commit, True):
@ -248,9 +250,11 @@ class Release:

        # Prefetch the branch to have it updated
        if self._git.branch == branch:
-            self.run("git pull")
+            self.run("git pull --no-recurse-submodules")
        else:
-            self.run(f"git fetch {self.repo.url} {branch}:{branch}")
+            self.run(
+                f"git fetch {self.repo.url} {branch}:{branch} --no-recurse-submodules"
+            )
        output = self.run(f"git branch --contains={self.release_commit} {branch}")
        if branch not in output:
            raise Exception(
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@ -123,6 +123,7 @@ TRUSTED_CONTRIBUTORS = {
        "BoloniniD",  # Seasoned contributor, HSE
        "tonickkozlov",  # Cloudflare
        "tylerhannan",  # ClickHouse Employee
+        "myrrc", # Mike Kot, DoubleCloud
    ]
 }

--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@ -449,7 +449,6 @@ class FailureReason(enum.Enum):
    NO_LONG = "not running long tests"
    REPLICATED_DB = "replicated-database"
    S3_STORAGE = "s3-storage"
-    STRESS = "stress"
    BUILD = "not running for current build"
    BACKWARD_INCOMPATIBLE = "test is backward incompatible"
    NO_PARALLEL_REPLICAS = "smth in not supported with parallel replicas"
@ -861,9 +860,6 @@ class TestCase:
        elif tags and ("no-s3-storage" in tags) and args.s3_storage:
            return FailureReason.S3_STORAGE

-        elif tags and ("no-stress" in tags) and args.stress:
-            return FailureReason.STRESS
-
        elif tags:
            for build_flag in args.build_flags:
                if "no-" + build_flag in tags:
@ -2229,8 +2225,16 @@ def find_binary(name):
    bin_path = os.path.join("/usr/bin", name)
    if os.access(bin_path, os.X_OK):
        return bin_path
-    return None

+    raise Exception(f"{name} was not found in PATH")
+
+def find_clickhouse_command(binary, command):
+    symlink = binary + "-" + command
+    if os.access(symlink, os.X_OK):
+        return symlink
+
+    # To avoid requiring symlinks (in case you download binary from CI)
+    return binary + " " + command

 def get_additional_client_options(args):
    if args.client_option:
@ -2244,24 +2248,7 @@ def get_additional_client_options_url(args):
    return ""


-if __name__ == "__main__":
-    stop_time = None
-    exit_code = multiprocessing.Value("i", 0)
-    server_died = multiprocessing.Event()
-    stop_tests_triggered_lock = multiprocessing.Lock()
-    stop_tests_triggered = multiprocessing.Event()
-    queue = multiprocessing.Queue(maxsize=1)
-    multiprocessing_manager = multiprocessing.Manager()
-    restarted_tests = multiprocessing_manager.list()
-
-    # Move to a new process group and kill it at exit so that we don't have any
-    # infinite tests processes left
-    # (new process group is required to avoid killing some parent processes)
-    os.setpgid(0, 0)
-    signal.signal(signal.SIGTERM, signal_handler)
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGHUP, signal_handler)
-
+def parse_args():
    parser = ArgumentParser(description="ClickHouse functional tests")
    parser.add_argument("-q", "--queries", help="Path to queries dir")
    parser.add_argument("--tmp", help="Path to tmp dir")
@ -2272,7 +2259,6 @@ if __name__ == "__main__":
        default=find_binary("clickhouse"),
        help="Path to clickhouse binary or name of binary in PATH",
    )
-
    parser.add_argument(
        "-c",
        "--client",
@ -2417,12 +2403,6 @@ if __name__ == "__main__":
        default=False,
        help="Run tests over s3 storage",
    )
-    parser.add_argument(
-        "--stress",
-        action="store_true",
-        default=False,
-        help="Run stress tests",
-    )
    parser.add_argument(
        "--no-random-settings",
        action="store_true",
@ -2516,7 +2496,32 @@ if __name__ == "__main__":
        help="Do not include tests that are not supported with parallel replicas feature",
    )

-    args = parser.parse_args()
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    stop_time = None
+    exit_code = multiprocessing.Value("i", 0)
+    server_died = multiprocessing.Event()
+    stop_tests_triggered_lock = multiprocessing.Lock()
+    stop_tests_triggered = multiprocessing.Event()
+    queue = multiprocessing.Queue(maxsize=1)
+    multiprocessing_manager = multiprocessing.Manager()
+    restarted_tests = multiprocessing_manager.list()
+
+    # Move to a new process group and kill it at exit so that we don't have any
+    # infinite tests processes left
+    # (new process group is required to avoid killing some parent processes)
+    os.setpgid(0, 0)
+    signal.signal(signal.SIGTERM, signal_handler)
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGHUP, signal_handler)
+
+    try:
+        args = parse_args()
+    except Exception as e:
+        print(e, file=sys.stderr)
+        sys.exit(1)

    if args.queries and not os.path.isdir(args.queries):
        print(
@ -2557,21 +2562,21 @@ if __name__ == "__main__":

    if args.tmp is None:
        args.tmp = args.queries
-    if args.client is None:
-        client_bin = find_binary(args.binary + "-client")
-        if client_bin is not None:
-            args.client = client_bin
-            print("Using {args.client} as client program")
-        elif args.binary:
-            args.client = args.binary + " client"
-            print(f"Using {args.client} as client program (expecting monolithic build)")
-        else:
+
+    if args.client:
        print(
-                "No 'clickhouse' or 'clickhouse-client' client binary found",
+            "WARNING: --client option is deprecated and will be removed the the future, use --binary instead",
            file=sys.stderr,
        )
-            parser.print_help()
-            sys.exit(1)
+
+    args.client = find_clickhouse_command(args.binary, "client")
+
+    if args.extract_from_config:
+        print(
+            "WARNING: --extract_from_config option is deprecated and will be removed the the future",
+            file=sys.stderr,
+        )
+    args.extract_from_config = find_clickhouse_command(args.binary, "extract-from-config")

    if args.configclient:
        args.client += " --config-file=" + args.configclient
@ -2634,12 +2639,6 @@ if __name__ == "__main__":
    else:
        args.client_options_query_str = ""

-    if args.extract_from_config is None:
-        if os.access(args.binary + "-extract-from-config", os.X_OK):
-            args.extract_from_config = args.binary + "-extract-from-config"
-        else:
-            args.extract_from_config = args.binary + " extract-from-config"
-
    if args.jobs is None:
        args.jobs = multiprocessing.cpu_count()

--- a/tests/integration/ci-runner.py
+++ b/tests/integration/ci-runner.py
@ -380,11 +380,6 @@ class ClickhouseIntegrationTestsRunner:
        )

    def _compress_logs(self, dir, relpaths, result_path):
-        # We execute sync in advance to have all files written after containers
-        # are finished or killed
-        subprocess.check_call(  # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
-            "sync", shell=True
-        )
        retcode = subprocess.call(  # STYLE_CHECK_ALLOW_SUBPROCESS_CHECK_CALL
            "tar czf {} -C {} {}".format(result_path, dir, " ".join(relpaths)),
            shell=True,
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@ -3385,6 +3385,7 @@ class ClickHouseInstance:
        port=8123,
        timeout=None,
        retry_strategy=None,
+        content=False,
    ):
        output, error = self.http_query_and_get_answer_with_error(
            sql,
@ -3396,6 +3397,7 @@ class ClickHouseInstance:
            port=port,
            timeout=timeout,
            retry_strategy=retry_strategy,
+            content=content,
        )

        if error:
@ -3448,6 +3450,7 @@ class ClickHouseInstance:
        port=8123,
        timeout=None,
        retry_strategy=None,
+        content=False,
    ):
        logging.debug(f"Executing query {sql} on {self.name} via HTTP interface")
        if params is None:
@ -3479,7 +3482,7 @@ class ClickHouseInstance:
        r = requester.request(method, url, data=data, auth=auth, timeout=timeout)

        if r.ok:
-            return (r.text, None)
+            return (r.content if content else r.text, None)

        code = r.status_code
        return (None, str(code) + " " + http.client.responses[code] + ": " + r.text)
--- a/tests/integration/test_http_native/init.py
+++ b/tests/integration/test_http_native/init.py
--- a/tests/integration/test_http_native/test.py
+++ b/tests/integration/test_http_native/test.py
@ -0,0 +1,29 @@
+import pytest
+from helpers.cluster import ClickHouseCluster
+
+cluster = ClickHouseCluster(__file__)
+instance = cluster.add_instance("instance")
+
+
+@pytest.fixture(scope="module", autouse=True)
+def setup_nodes():
+    try:
+        cluster.start()
+        yield cluster
+
+    finally:
+        cluster.shutdown()
+
+
+def test_http_native_returns_timezone():
+    # No timezone when no protocol version sent
+    query = "SELECT toDateTime(1676369730, 'Asia/Shanghai') as dt FORMAT Native"
+    raw = instance.http_query(query, content=True)
+    assert raw.hex(" ", 2) == "0101 0264 7408 4461 7465 5469 6d65 425f eb63"
+
+    # Timezone available when protocol version sent
+    raw = instance.http_query(
+        query, params={"client_protocol_version": 54337}, content=True
+    )
+    ch_type = raw[14:39].decode()
+    assert ch_type == "DateTime('Asia/Shanghai')"
--- a/tests/integration/test_s3_cluster/test.py
+++ b/tests/integration/test_s3_cluster/test.py
@ -32,8 +32,9 @@ def create_buckets_s3(cluster):
            # a String, b UInt64
            data = []

-            for number in range(100):
-                data.append([str(number) * 10, number])
+            # Make all files a bit different
+            for number in range(100 + file_number):
+                data.append([str(number + file_number) * 10, number + file_number])

            writer = csv.writer(f)
            writer.writerows(data)
--- a/tests/queries/0_stateless/00597_push_down_predicate_long.reference
+++ b/tests/queries/0_stateless/00597_push_down_predicate_long.reference
@ -402,8 +402,8 @@ FROM
 ANY LEFT JOIN
 (
    SELECT
-        id,
        date,
+        id,
        name,
        value
    FROM test_00597
@ -472,8 +472,8 @@ FROM
 ANY LEFT JOIN
 (
    SELECT
-        id,
        date,
+        id,
        name,
        value
    FROM test_00597
@ -537,10 +537,10 @@ FROM
    ANY LEFT JOIN
    (
        SELECT
-            name,
-            value,
            date,
-            id
+            id,
+            name,
+            value
        FROM test_00597
    ) AS b ON id = b.id
    WHERE id = 1
@ -567,8 +567,8 @@ FROM
 SEMI LEFT JOIN
 (
    SELECT
-        id,
        date,
+        id,
        name,
        value
    FROM
--- a/tests/queries/0_stateless/00717_merge_and_distributed.sql
+++ b/tests/queries/0_stateless/00717_merge_and_distributed.sql
@ -20,9 +20,9 @@ SELECT * FROM merge(currentDatabase(), 'test_local_1');
 SELECT *, _table FROM merge(currentDatabase(), 'test_local_1') ORDER BY _table;
 SELECT sum(value), _table FROM merge(currentDatabase(), 'test_local_1') GROUP BY _table ORDER BY _table;
 SELECT * FROM merge(currentDatabase(), 'test_local_1') WHERE _table = 'test_local_1';
-SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table = 'test_local_1'; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK }
+SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table = 'test_local_1'; -- { serverError ILLEGAL_PREWHERE }
 SELECT * FROM merge(currentDatabase(), 'test_local_1') WHERE _table in ('test_local_1', 'test_local_2');
-SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table in ('test_local_1', 'test_local_2'); -- { serverError NOT_FOUND_COLUMN_IN_BLOCK }
+SELECT * FROM merge(currentDatabase(), 'test_local_1') PREWHERE _table in ('test_local_1', 'test_local_2'); -- { serverError ILLEGAL_PREWHERE }

 SELECT '--------------Single Distributed------------';
 SELECT * FROM merge(currentDatabase(), 'test_distributed_1');
@ -38,9 +38,9 @@ SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') ORDER BY _ta
 SELECT *, _table FROM merge(currentDatabase(), 'test_local_1|test_local_2') ORDER BY _table;
 SELECT sum(value), _table FROM merge(currentDatabase(), 'test_local_1|test_local_2') GROUP BY _table ORDER BY _table;
 SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') WHERE _table = 'test_local_1';
-SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table = 'test_local_1'; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK }
+SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table = 'test_local_1'; -- { serverError ILLEGAL_PREWHERE }
 SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') WHERE _table in ('test_local_1', 'test_local_2') ORDER BY value;
-SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table in ('test_local_1', 'test_local_2') ORDER BY value; -- { serverError NOT_FOUND_COLUMN_IN_BLOCK }
+SELECT * FROM merge(currentDatabase(), 'test_local_1|test_local_2') PREWHERE _table in ('test_local_1', 'test_local_2') ORDER BY value; -- { serverError ILLEGAL_PREWHERE }

 SELECT '--------------Local Merge Distributed------------';
 SELECT * FROM merge(currentDatabase(), 'test_local_1|test_distributed_2') ORDER BY _table;
--- a/tests/queries/0_stateless/00718_format_datetime.reference
+++ b/tests/queries/0_stateless/00718_format_datetime.reference
@ -1,25 +1,33 @@
+Tue	Tue
+Jan	Jan
+01	01
 20	20
 02	02
 01/02/18	01/02/18
 2	 2
 2018-01-02	2018-01-02
+10	12
 22	00
 02
+33	00
 10	12
 11
 12
 001	001
 366	366
+00	00
 01	01
 33	00
 \n	\n
 AM	AM
 AM
 PM
+10:33 PM	12:00 AM
 22:33	00:00
 44	00
 \t	\t
 22:33:44	00:00:00
+Tuesday	Tuesday
 1	7	1	7
 01	01	53	52	01	01	53	52
 1	0	1	0
--- a/tests/queries/0_stateless/00718_format_datetime.sql
+++ b/tests/queries/0_stateless/00718_format_datetime.sql
@ -1,35 +1,45 @@
 SET send_logs_level = 'fatal';

-SELECT formatDateTime(); -- { serverError 42 }
-SELECT formatDateTime('not a datetime', 'IGNORED'); -- { serverError 43 }
-SELECT formatDateTime(now(), now()); -- { serverError 43 }
-SELECT formatDateTime(now(), 'good format pattern', now()); -- { serverError 43 }
-SELECT formatDateTime(now(), 'unescaped %'); -- { serverError 36 }
-SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%U'); -- { serverError 48 }
-SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%W'); -- { serverError 48 }
+SELECT formatDateTime(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH (42) }
+SELECT formatDateTime('not a datetime', 'IGNORED'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) }
+SELECT formatDateTime(now(), now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) }
+SELECT formatDateTime(now(), 'good format pattern', now()); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT (43) }
+SELECT formatDateTime(now(), 'unescaped %'); -- { serverError BAD_ARGUMENTS (36) }
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%U'); -- { serverError NOT_IMPLEMENTED (48) }
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%v'); -- { serverError NOT_IMPLEMENTED (48) }
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%x'); -- { serverError NOT_IMPLEMENTED (48) }
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%X'); -- { serverError NOT_IMPLEMENTED (48) }

+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%a'), formatDateTime(toDate32('2018-01-02'), '%a');
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%b'), formatDateTime(toDate32('2018-01-02'), '%b');
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%c'), formatDateTime(toDate32('2018-01-02'), '%c');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%C'), formatDateTime(toDate32('2018-01-02'), '%C');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%d'), formatDateTime(toDate32('2018-01-02'), '%d');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%D'), formatDateTime(toDate32('2018-01-02'), '%D');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%e'), formatDateTime(toDate32('2018-01-02'), '%e');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%F'), formatDateTime(toDate32('2018-01-02'), '%F');
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%h'), formatDateTime(toDate32('2018-01-02'), '%h');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%H'), formatDateTime(toDate32('2018-01-02'), '%H');
 SELECT formatDateTime(toDateTime('2018-01-02 02:33:44'), '%H');
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%i'), formatDateTime(toDate32('2018-01-02'), '%i');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%I'), formatDateTime(toDate32('2018-01-02'), '%I');
 SELECT formatDateTime(toDateTime('2018-01-02 11:33:44'), '%I');
 SELECT formatDateTime(toDateTime('2018-01-02 00:33:44'), '%I');
 SELECT formatDateTime(toDateTime('2018-01-01 00:33:44'), '%j'), formatDateTime(toDate32('2018-01-01'), '%j');
 SELECT formatDateTime(toDateTime('2000-12-31 00:33:44'), '%j'), formatDateTime(toDate32('2000-12-31'), '%j');
+SELECT formatDateTime(toDateTime('2000-12-31 00:33:44'), '%k'), formatDateTime(toDate32('2000-12-31'), '%k');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%m'), formatDateTime(toDate32('2018-01-02'), '%m');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%M'), formatDateTime(toDate32('2018-01-02'), '%M');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%n'), formatDateTime(toDate32('2018-01-02'), '%n');
 SELECT formatDateTime(toDateTime('2018-01-02 00:33:44'), '%p'), formatDateTime(toDateTime('2018-01-02'), '%p');
 SELECT formatDateTime(toDateTime('2018-01-02 11:33:44'), '%p');
 SELECT formatDateTime(toDateTime('2018-01-02 12:33:44'), '%p');
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%r'), formatDateTime(toDate32('2018-01-02'), '%r');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%R'), formatDateTime(toDate32('2018-01-02'), '%R');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%S'), formatDateTime(toDate32('2018-01-02'), '%S');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%t'), formatDateTime(toDate32('2018-01-02'), '%t');
 SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%T'), formatDateTime(toDate32('2018-01-02'), '%T');
+SELECT formatDateTime(toDateTime('2018-01-02 22:33:44'), '%W'), formatDateTime(toDate32('2018-01-02'), '%W');
 SELECT formatDateTime(toDateTime('2018-01-01 22:33:44'), '%u'), formatDateTime(toDateTime('2018-01-07 22:33:44'), '%u'),
       formatDateTime(toDate32('2018-01-01'), '%u'), formatDateTime(toDate32('2018-01-07'), '%u');
 SELECT formatDateTime(toDateTime('1996-01-01 22:33:44'), '%V'), formatDateTime(toDateTime('1996-12-31 22:33:44'), '%V'),
--- a/tests/queries/0_stateless/00719_format_datetime_rand.sql
+++ b/tests/queries/0_stateless/00719_format_datetime_rand.sql
@ -1,6 +1,6 @@
 -- We add 1, because function toString has special behaviour for zero datetime
 WITH toDateTime(1 + rand() % 0xFFFFFFFF) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%F %T') != toString(t);
-WITH toDateTime(1 + rand() % 0xFFFFFFFF) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%Y-%m-%d %H:%M:%S') != toString(t);
+WITH toDateTime(1 + rand() % 0xFFFFFFFF) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%Y-%m-%d %H:%i:%S') != toString(t);
 WITH toDateTime(1 + rand() % 0xFFFFFFFF) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%Y-%m-%d %R:%S') != toString(t);
 WITH toDateTime(1 + rand() % 0xFFFFFFFF) AS t SELECT count() FROM numbers(1000000) WHERE formatDateTime(t, '%F %R:%S') != toString(t);

--- a/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql
+++ b/tests/queries/0_stateless/00801_daylight_saving_time_hour_underflow.sql
@ -3,4 +3,4 @@ SELECT
    ignore(toDateTime(370641600, 'Asia/Istanbul') AS t),
    replaceRegexpAll(toString(t), '\\d', 'x'),
    toHour(t) < 24,
-    replaceRegexpAll(formatDateTime(t, '%Y-%m-%d %H:%M:%S; %R:%S; %F %T'), '\\d', 'x');
+    replaceRegexpAll(formatDateTime(t, '%Y-%m-%d %H:%i:%S; %R:%S; %F %T'), '\\d', 'x');
--- a/tests/queries/0_stateless/01161_all_system_tables.sh
+++ b/tests/queries/0_stateless/01161_all_system_tables.sh
@ -1,4 +1,6 @@
 #!/usr/bin/env bash
+# Tags: no-parallel
+# Tag no-parallel: since someone may create table in system database

 # Server may ignore some exceptions, but it still print exceptions to logs and (at least in CI) sends Error and Warning log messages to client
 # making test fail because of non-empty stderr. Ignore such log messages.
--- a/tests/queries/0_stateless/01411_from_unixtime.reference
+++ b/tests/queries/0_stateless/01411_from_unixtime.reference
@ -5,25 +5,33 @@
 11
 1970-01-15
 1970-01-15 06:52:36
+Tue	Tue
+Jan	Jan
+01	01
 20	20
 02	02
 01/02/18	01/02/18
 2	 2
 2018-01-02	2018-01-02
+10	12
 22	00
 02
+33	00
 10	12
 11
 12
 001	001
 366	366
+00	00
 01	01
 33	00
 \n	\n
 AM	AM
 AM
 PM
+10:33 PM	12:00 AM
 22:33	00:00
 44	00
 \t	\t
 22:33:44	00:00:00
+Tuesday	Tuesday
--- a/tests/queries/0_stateless/01411_from_unixtime.sql
+++ b/tests/queries/0_stateless/01411_from_unixtime.sql
@ -5,25 +5,33 @@ SELECT FROM_UNIXTIME(5345345, '%C', 'UTC');
 SELECT FROM_UNIXTIME(645123, '%H', 'UTC');
 SELECT FROM_UNIXTIME(1232456, '%Y-%m-%d', 'UTC');
 SELECT FROM_UNIXTIME(1234356, '%Y-%m-%d %R:%S', 'UTC');
+SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%a'), FROM_UNIXTIME(toDate32('2018-01-02'), '%a');
+SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%b'), FROM_UNIXTIME(toDate32('2018-01-02'), '%b');
+SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%c'), FROM_UNIXTIME(toDate32('2018-01-02'), '%c');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%C'), FROM_UNIXTIME(toDate32('2018-01-02'), '%C');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%d'), FROM_UNIXTIME(toDate32('2018-01-02'), '%d');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%D'), FROM_UNIXTIME(toDate32('2018-01-02'), '%D');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%e'), FROM_UNIXTIME(toDate32('2018-01-02'), '%e');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%F'), FROM_UNIXTIME(toDate32('2018-01-02'), '%F');
+SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%h'), FROM_UNIXTIME(toDate32('2018-01-02'), '%h');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%H'), FROM_UNIXTIME(toDate32('2018-01-02'), '%H');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 02:33:44'), '%H');
+SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%i'), FROM_UNIXTIME(toDate32('2018-01-02'), '%i');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%I'), FROM_UNIXTIME(toDate32('2018-01-02'), '%I');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 11:33:44'), '%I');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%I');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-01 00:33:44'), '%j'), FROM_UNIXTIME(toDate32('2018-01-01'), '%j');
 SELECT FROM_UNIXTIME(toDateTime('2000-12-31 00:33:44'), '%j'), FROM_UNIXTIME(toDate32('2000-12-31'), '%j');
+SELECT FROM_UNIXTIME(toDateTime('2000-12-31 00:33:44'), '%k'), FROM_UNIXTIME(toDate32('2000-12-31'), '%k');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%m'), FROM_UNIXTIME(toDate32('2018-01-02'), '%m');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%M'), FROM_UNIXTIME(toDate32('2018-01-02'), '%M');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%n'), FROM_UNIXTIME(toDate32('2018-01-02'), '%n');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 00:33:44'), '%p'), FROM_UNIXTIME(toDate32('2018-01-02'), '%p');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 11:33:44'), '%p');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 12:33:44'), '%p');
+SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%r'), FROM_UNIXTIME(toDate32('2018-01-02'), '%r');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%R'), FROM_UNIXTIME(toDate32('2018-01-02'), '%R');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%S'), FROM_UNIXTIME(toDate32('2018-01-02'), '%S');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%t'), FROM_UNIXTIME(toDate32('2018-01-02'), '%t');
 SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%T'), FROM_UNIXTIME(toDate32('2018-01-02'), '%T');
+SELECT FROM_UNIXTIME(toDateTime('2018-01-02 22:33:44'), '%W'), FROM_UNIXTIME(toDate32('2018-01-02'), '%W');
--- a/tests/queries/0_stateless/01646_system_restart_replicas_smoke.sql
+++ b/tests/queries/0_stateless/01646_system_restart_replicas_smoke.sql
@ -1,9 +1,5 @@
-- Tags: replica, no-tsan, no-parallel, no-stress
+-- Tags: replica, no-tsan, no-parallel
 -- Tag no-tsan: RESTART REPLICAS can acquire too much locks, while only 64 is possible from one thread under TSan
-- Tag no-stress: RESTART REPLICAS can leave some tables,
--                that may pollute error log,
--                like in 01414_mutations_and_errors_zookeeper.
--                no-stress is like worked no-parallel for stress testing

 DROP TABLE IF EXISTS data_01646;
 CREATE TABLE data_01646 (x Date, s String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_01646/data_01646', 'r') ORDER BY s PARTITION BY x;
--- a/tests/queries/0_stateless/01915_merge_prewhere_virtual_column_rand_chao_wang.sql
+++ b/tests/queries/0_stateless/01915_merge_prewhere_virtual_column_rand_chao_wang.sql
@ -9,7 +9,10 @@ ENGINE = MergeTree()
 ORDER BY f1;

 -- In version 20.12 this query sometimes produces an exception "Cannot find column"
-SELECT f2 FROM merge(currentDatabase(), '^abc$') PREWHERE _table = 'abc' AND f1 = 'a' AND rand() % 100 < 20;
-SELECT f2 FROM merge(currentDatabase(), '^abc$') PREWHERE _table = 'abc' AND f1 = 'a';
+SELECT f2 FROM merge(currentDatabase(), '^abc$') PREWHERE _table = 'abc' AND f1 = 'a' AND rand() % 100 < 20; -- { serverError ILLEGAL_PREWHERE }
+SELECT f2 FROM merge(currentDatabase(), '^abc$') PREWHERE _table = 'abc' AND f1 = 'a'; -- { serverError ILLEGAL_PREWHERE }
+
+SELECT f2 FROM merge(currentDatabase(), '^abc$') PREWHERE f1 = 'a' AND rand() % 100 < 20 WHERE _table = 'abc';
+SELECT f2 FROM merge(currentDatabase(), '^abc$') PREWHERE f1 = 'a' WHERE _table = 'abc';

 DROP TABLE abc;
--- a/tests/queries/0_stateless/01931_storage_merge_no_columns.sql
+++ b/tests/queries/0_stateless/01931_storage_merge_no_columns.sql
@ -1,4 +1,5 @@
 drop table if exists data;
 create table data (key Int) engine=MergeTree() order by key;
-select 1 from merge(currentDatabase(), '^data$') prewhere _table in (NULL);
+select 1 from merge(currentDatabase(), '^data$') prewhere _table in (NULL); -- { serverError ILLEGAL_PREWHERE }
+select 1 from merge(currentDatabase(), '^data$') where _table in (NULL);
 drop table data;
--- a/tests/queries/0_stateless/02227_union_match_by_name.reference
+++ b/tests/queries/0_stateless/02227_union_match_by_name.reference
@ -31,8 +31,8 @@ Header: avgWeighted(x, y) Nullable(Float64)
            Header: x Nullable(Nothing)
                    y UInt8
              Expression (Before ORDER BY)
-              Header: NULL Nullable(Nothing)
-                      1 UInt8
+              Header: 1 UInt8
+                      NULL Nullable(Nothing)
                      dummy UInt8
                ReadFromStorage (SystemOne)
                Header: dummy UInt8
--- a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference
+++ b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.reference
@ -0,0 +1,99 @@
+== Test SELECT ... FINAL - no is_deleted ==
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d6	2	1
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d6	2	1
+== Test SELECT ... FINAL - no is_deleted SETTINGS clean_deleted_rows=Always ==
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d6	2	1
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d6	2	1
+== Test SELECT ... FINAL ==
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d6	2	1
+== Insert backups ==
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+== Insert a second batch with overlaping data ==
+d1	5	0
+d2	3	0
+d3	3	0
+d4	3	0
+d5	1	0
+== Only last version remains after OPTIMIZE W/ CLEANUP ==
+d1	5	0
+d2	1	0
+d3	1	0
+d4	1	0
+d5	1	0
+d6	3	0
+== OPTIMIZE W/ CLEANUP (remove d6) ==
+d1	5	0
+d2	1	0
+d3	1	0
+d4	1	0
+d5	1	0
+== Test of the SETTINGS clean_deleted_rows as Always ==
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d6	2	1
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+== Test of the SETTINGS clean_deleted_rows as Never ==
+d1	5	0
+d2	1	0
+d3	1	0
+d4	3	0
+d5	1	0
+d6	2	1
+== (Replicas) Test optimize ==
+d2	1	0
+d4	1	0
+== (Replicas) Test settings ==
+c2	1	0
+c4	1	0
+== Check cleanup & settings for other merge trees ==
+d1	1	1
+d1	1	1
+d1	1	1
+d1	1	1	1
+d1	1	1	1
--- a/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql
+++ b/tests/queries/0_stateless/02490_replacing_merge_tree_is_deleted_column.sql
@ -0,0 +1,160 @@
+-- Tags: zookeeper
+
+-- Settings allow_deprecated_syntax_for_merge_tree prevent to enable the is_deleted column
+set allow_deprecated_syntax_for_merge_tree=0;
+
+-- Test the bahaviour without the is_deleted column
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid);
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0);
+SELECT '== Test SELECT ... FINAL - no is_deleted ==';
+select * from test FINAL;
+OPTIMIZE TABLE test FINAL CLEANUP;
+select * from test;
+
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version) Order by (uid) SETTINGS clean_deleted_rows='Always';
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0);
+SELECT '== Test SELECT ... FINAL - no is_deleted SETTINGS clean_deleted_rows=Always ==';
+select * from test FINAL;
+OPTIMIZE TABLE test FINAL CLEANUP;
+select * from test;
+
+-- Test the new behaviour
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid);
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0);
+SELECT '== Test SELECT ... FINAL ==';
+select * from test FINAL;
+select * from test;
+
+SELECT '== Insert backups ==';
+INSERT INTO test (*) VALUES ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1);
+select * from test FINAL;
+
+SELECT '== Insert a second batch with overlaping data ==';
+INSERT INTO test (*) VALUES ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 1), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0), ('d2', 2, 1), ('d2', 3, 0), ('d3', 2, 1), ('d3', 3, 0);
+select * from test FINAL;
+
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid);
+
+-- Expect d6 to be version=3 is_deleted=false
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0),  ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 0);
+-- Insert previous version of 'd6' but only v=3 is_deleted=false will remain
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0),  ('d5', 1, 0), ('d6', 1, 0), ('d6', 2, 1);
+SELECT '== Only last version remains after OPTIMIZE W/ CLEANUP ==';
+OPTIMIZE TABLE test FINAL CLEANUP;
+select * from test;
+
+-- insert d6 v=3 is_deleted=true (timestamp more recent so this version should be the one take into acount)
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d1', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d1', 5, 0), ('d2', 1, 0), ('d3', 1, 0), ('d4', 1, 0),  ('d5', 1, 0), ('d6', 1, 0), ('d6', 3, 1);
+
+SELECT '== OPTIMIZE W/ CLEANUP (remove d6) ==';
+OPTIMIZE TABLE test FINAL CLEANUP;
+-- No d6 anymore
+select * from test;
+
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid) SETTINGS clean_deleted_rows='Always';
+
+SELECT '== Test of the SETTINGS clean_deleted_rows as Always ==';
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0);
+-- Even if the setting is set to Always, the SELECT FINAL doesn't delete rows
+select * from test FINAL;
+select * from test;
+
+OPTIMIZE TABLE test FINAL;
+-- d6 has to be removed since we set clean_deleted_rows as 'Always'
+select * from test;
+
+SELECT '== Test of the SETTINGS clean_deleted_rows as Never ==';
+ALTER TABLE test MODIFY SETTING clean_deleted_rows='Never';
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0);
+INSERT INTO test (*) VALUES ('d1', 1, 0), ('d2', 1, 0), ('d6', 1, 0), ('d4', 1, 0), ('d6', 2, 1), ('d3', 1, 0), ('d1', 2, 1), ('d5', 1, 0), ('d4', 2, 1), ('d1', 3, 0), ('d1', 4, 1), ('d4', 3, 0), ('d1', 5, 0);
+OPTIMIZE TABLE test FINAL;
+-- d6 has NOT to be removed since we set clean_deleted_rows as 'Never'
+select * from test;
+
+DROP TABLE IF EXISTS testCleanupR1;
+
+CREATE TABLE testCleanupR1 (uid String, version UInt32, is_deleted UInt8)
+    ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_cleanup/', 'r1', version, is_deleted)
+    ORDER BY uid;
+
+
+INSERT INTO testCleanupR1 (*) VALUES ('d1', 1, 0),('d2', 1, 0),('d3', 1, 0),('d4', 1, 0);
+INSERT INTO testCleanupR1 (*) VALUES ('d3', 2, 1);
+INSERT INTO testCleanupR1 (*) VALUES ('d1', 2, 1);
+SYSTEM SYNC REPLICA testCleanupR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet"
+
+OPTIMIZE TABLE testCleanupR1 FINAL CLEANUP;
+
+-- Only d3 to d5 remain
+SELECT '== (Replicas) Test optimize ==';
+SELECT * FROM testCleanupR1;
+
+------------------------------
+
+DROP TABLE IF EXISTS testSettingsR1;
+
+CREATE TABLE testSettingsR1 (col1 String, version UInt32, is_deleted UInt8)
+    ENGINE = ReplicatedReplacingMergeTree('/clickhouse/{database}/tables/test_setting/', 'r1', version, is_deleted)
+    ORDER BY col1
+    SETTINGS clean_deleted_rows = 'Always';
+
+INSERT INTO testSettingsR1 (*) VALUES ('c1', 1, 1),('c2', 1, 0),('c3', 1, 1),('c4', 1, 0);
+SYSTEM SYNC REPLICA testSettingsR1; -- Avoid "Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet"
+
+OPTIMIZE TABLE testSettingsR1 FINAL;
+
+-- Only d3 to d5 remain
+SELECT '== (Replicas) Test settings ==';
+SELECT * FROM testSettingsR1;
+
+
+------------------------------
+-- Check errors
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (uid String, version UInt32, is_deleted UInt8) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid);
+
+-- is_deleted == 0/1
+INSERT INTO test (*) VALUES ('d1', 1, 2); -- { serverError INCORRECT_DATA }
+
+DROP TABLE IF EXISTS test;
+-- checkis_deleted type
+CREATE TABLE test (uid String, version UInt32, is_deleted String) ENGINE = ReplacingMergeTree(version, is_deleted) Order by (uid); -- { serverError BAD_TYPE_OF_FIELD }
+
+-- is_deleted column for other mergeTrees - ErrorCodes::LOGICAL_ERROR)
+
+-- Check clean_deleted_rows='Always' for other MergeTrees
+SELECT '== Check cleanup & settings for other merge trees ==';
+CREATE TABLE testMT (uid String, version UInt32, is_deleted UInt8) ENGINE = MergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always';
+INSERT INTO testMT (*) VALUES ('d1', 1, 1);
+OPTIMIZE TABLE testMT FINAL CLEANUP;  -- { serverError CANNOT_ASSIGN_OPTIMIZE }
+OPTIMIZE TABLE testMT FINAL;
+SELECT * FROM testMT;
+
+CREATE TABLE testSummingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = SummingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always';
+INSERT INTO testSummingMT (*) VALUES ('d1', 1, 1);
+OPTIMIZE TABLE testSummingMT FINAL CLEANUP;  -- { serverError CANNOT_ASSIGN_OPTIMIZE }
+OPTIMIZE TABLE testSummingMT FINAL;
+SELECT * FROM testSummingMT;
+
+CREATE TABLE testAggregatingMT (uid String, version UInt32, is_deleted UInt8) ENGINE = AggregatingMergeTree() Order by (uid) SETTINGS clean_deleted_rows='Always';
+INSERT INTO testAggregatingMT (*) VALUES ('d1', 1, 1);
+OPTIMIZE TABLE testAggregatingMT FINAL CLEANUP;  -- { serverError CANNOT_ASSIGN_OPTIMIZE }
+OPTIMIZE TABLE testAggregatingMT FINAL;
+SELECT * FROM testAggregatingMT;
+
+CREATE TABLE testCollapsingMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = CollapsingMergeTree(sign) Order by (uid) SETTINGS clean_deleted_rows='Always';
+INSERT INTO testCollapsingMT (*) VALUES ('d1', 1, 1, 1);
+OPTIMIZE TABLE testCollapsingMT FINAL CLEANUP;  -- { serverError CANNOT_ASSIGN_OPTIMIZE }
+OPTIMIZE TABLE testCollapsingMT FINAL;
+SELECT * FROM testCollapsingMT;
+
+CREATE TABLE testVersionedCMT (uid String, version UInt32, is_deleted UInt8, sign Int8) ENGINE = VersionedCollapsingMergeTree(sign, version) Order by (uid) SETTINGS clean_deleted_rows='Always';
+INSERT INTO testVersionedCMT (*) VALUES ('d1', 1, 1, 1);
+OPTIMIZE TABLE testVersionedCMT FINAL CLEANUP;  -- { serverError CANNOT_ASSIGN_OPTIMIZE }
+OPTIMIZE TABLE testVersionedCMT FINAL;
+SELECT * FROM testVersionedCMT;
--- a/tests/queries/0_stateless/02550_client_connections_credentials.reference
+++ b/tests/queries/0_stateless/02550_client_connections_credentials.reference
@ -1,5 +1,10 @@
+connection
+No such connection 'no_such_connection' in connections_credentials
 hostname
-Not found address of host: MySQL.
+Not found address of host: test_hostname_invalid.
+1
+system
+system
 port
 Connection refused (localhost:0).
 9000
--- a/tests/queries/0_stateless/02550_client_connections_credentials.sh
+++ b/tests/queries/0_stateless/02550_client_connections_credentials.sh
@ -22,10 +22,15 @@ cat > $CONFIG <<EOL

    <connections_credentials>
        <connection>
-            <name>test_hostname</name>
+            <name>test_hostname_invalid</name>
            <hostname>MySQL</hostname>
        </connection>

+        <connection>
+            <name>$TEST_HOST</name>
+            <database>system</database>
+        </connection>
+
        <connection>
            <name>test_port</name>
            <hostname>$TEST_HOST</hostname>
@ -65,22 +70,27 @@ cat > $CONFIG <<EOL
 </clickhouse>
 EOL

+echo 'connection'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection no_such_connection -q 'select 1' |& grep -F -o "No such connection 'no_such_connection' in connections_credentials"
 echo 'hostname'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_hostname -q 'select 1' |& grep -F -o 'Not found address of host: MySQL.'
+$CLICKHOUSE_CLIENT --config $CONFIG --host test_hostname_invalid -q 'select 1' |& grep -F -o 'Not found address of host: test_hostname_invalid.'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_hostname_invalid --host $TEST_HOST -q 'select 1'
+$CLICKHOUSE_CLIENT --config $CONFIG -q 'select currentDatabase()'
+$CLICKHOUSE_CLIENT --config $CONFIG --host $TEST_HOST -q 'select currentDatabase()'
 echo 'port'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_port -q 'select tcpPort()' |& grep -F -o 'Connection refused (localhost:0).'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_port --port $TEST_PORT -q 'select tcpPort()'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_port -q 'select tcpPort()' |& grep -F -o 'Connection refused (localhost:0).'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_port --port $TEST_PORT -q 'select tcpPort()'
 echo 'secure'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_secure -q 'select tcpPort()' |& grep -c -F -o -e OPENSSL_internal:WRONG_VERSION_NUMBER -e 'tcp_secure protocol is disabled because poco library was built without NetSSL support.'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_secure -q 'select tcpPort()' |& grep -c -F -o -e OPENSSL_internal:WRONG_VERSION_NUMBER -e 'tcp_secure protocol is disabled because poco library was built without NetSSL support.'
 echo 'database'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_database -q 'select currentDatabase()'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_database -q 'select currentDatabase()'
 echo 'user'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_user -q 'select currentUser()' |& grep -F -o 'MySQL: Authentication failed'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_user --user default -q 'select currentUser()'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_user -q 'select currentUser()' |& grep -F -o 'MySQL: Authentication failed'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_user --user default -q 'select currentUser()'
 echo 'password'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_password -q 'select currentUser()' |& grep -F -o 'default: Authentication failed: password is incorrect, or there is no user with such name.'
-$CLICKHOUSE_CLIENT --config $CONFIG --host test_password --password "" -q 'select currentUser()'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_password -q 'select currentUser()' |& grep -F -o 'default: Authentication failed: password is incorrect, or there is no user with such name.'
+$CLICKHOUSE_CLIENT --config $CONFIG --connection test_password --password "" -q 'select currentUser()'
 echo 'history_file'
-$CLICKHOUSE_CLIENT --progress off --interactive --config $CONFIG --host test_history_file -q 'select 1' </dev/null |& grep -F -o 'Cannot create file: /no/such/dir/.history'
+$CLICKHOUSE_CLIENT --progress off --interactive --config $CONFIG --connection test_history_file -q 'select 1' </dev/null |& grep -F -o 'Cannot create file: /no/such/dir/.history'

 rm -f "${CONFIG:?}"
--- a/tests/queries/0_stateless/02564_date_format.reference
+++ b/tests/queries/0_stateless/02564_date_format.reference
@ -0,0 +1,30 @@
+Tue	Tue
+Jan	Jan
+01	01
+20	20
+02	02
+01/02/18	01/02/18
+ 2	 2
+2018-01-02	2018-01-02
+10	12
+22	00
+02
+33	00
+10	12
+11
+12
+001	001
+366	366
+00	00
+01	01
+33	00
+\n	\n
+AM	AM
+AM
+PM
+10:33 PM	12:00 AM
+22:33	00:00
+44	00
+\t	\t
+22:33:44	00:00:00
+Tuesday	Tuesday
--- a/tests/queries/0_stateless/02564_date_format.sql
+++ b/tests/queries/0_stateless/02564_date_format.sql
@ -0,0 +1,30 @@
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%a'), DATE_FORMAT(toDate32('2018-01-02'), '%a');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%b'), DATE_FORMAT(toDate32('2018-01-02'), '%b');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%c'), DATE_FORMAT(toDate32('2018-01-02'), '%c');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%C'), DATE_FORMAT(toDate32('2018-01-02'), '%C');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%d'), DATE_FORMAT(toDate32('2018-01-02'), '%d');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%D'), DATE_FORMAT(toDate32('2018-01-02'), '%D');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%e'), DATE_FORMAT(toDate32('2018-01-02'), '%e');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%F'), DATE_FORMAT(toDate32('2018-01-02'), '%F');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%h'), DATE_FORMAT(toDate32('2018-01-02'), '%h');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%H'), DATE_FORMAT(toDate32('2018-01-02'), '%H');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 02:33:44'), '%H');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%i'), DATE_FORMAT(toDate32('2018-01-02'), '%i');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%I'), DATE_FORMAT(toDate32('2018-01-02'), '%I');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 11:33:44'), '%I');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 00:33:44'), '%I');
+SELECT DATE_FORMAT(toDateTime('2018-01-01 00:33:44'), '%j'), DATE_FORMAT(toDate32('2018-01-01'), '%j');
+SELECT DATE_FORMAT(toDateTime('2000-12-31 00:33:44'), '%j'), DATE_FORMAT(toDate32('2000-12-31'), '%j');
+SELECT DATE_FORMAT(toDateTime('2000-12-31 00:33:44'), '%k'), DATE_FORMAT(toDate32('2000-12-31'), '%k');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%m'), DATE_FORMAT(toDate32('2018-01-02'), '%m');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%M'), DATE_FORMAT(toDate32('2018-01-02'), '%M');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%n'), DATE_FORMAT(toDate32('2018-01-02'), '%n');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 00:33:44'), '%p'), DATE_FORMAT(toDate32('2018-01-02'), '%p');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 11:33:44'), '%p');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 12:33:44'), '%p');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%r'), DATE_FORMAT(toDate32('2018-01-02'), '%r');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%R'), DATE_FORMAT(toDate32('2018-01-02'), '%R');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%S'), DATE_FORMAT(toDate32('2018-01-02'), '%S');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%t'), DATE_FORMAT(toDate32('2018-01-02'), '%t');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%T'), DATE_FORMAT(toDate32('2018-01-02'), '%T');
+SELECT DATE_FORMAT(toDateTime('2018-01-02 22:33:44'), '%W'), DATE_FORMAT(toDate32('2018-01-02'), '%W');
--- a/tests/queries/0_stateless/02570_merge_alias_prewhere.reference
+++ b/tests/queries/0_stateless/02570_merge_alias_prewhere.reference
@ -0,0 +1,12 @@
+-- { echoOn }
+-- for pure PREWHERE it is not addressed yet.
+SELECT * FROM m PREWHERE a = 'OK';
+OK	0
+SELECT * FROM m PREWHERE f = 0; -- { serverError ILLEGAL_PREWHERE }
+SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0;
+OK	0
+SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1;
+OK	0
+-- { echoOn }
+SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1;
+OK	0
--- a/tests/queries/0_stateless/02570_merge_alias_prewhere.sql
+++ b/tests/queries/0_stateless/02570_merge_alias_prewhere.sql
@ -0,0 +1,42 @@
+DROP TABLE IF EXISTS m;
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+
+CREATE TABLE m
+(
+    `a` String,
+    `f` UInt8
+)
+ENGINE = Merge(currentDatabase(), '^(t1|t2)$');
+
+CREATE TABLE t1
+(
+    a String,
+    f UInt8 ALIAS 0
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+SETTINGS index_granularity = 8192;
+INSERT INTO t1 VALUES ('OK');
+
+-- { echoOn }
+-- for pure PREWHERE it is not addressed yet.
+SELECT * FROM m PREWHERE a = 'OK';
+SELECT * FROM m PREWHERE f = 0; -- { serverError ILLEGAL_PREWHERE }
+SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=0;
+SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1;
+-- { echoOff }
+
+CREATE TABLE t2
+(
+    a String,
+    f UInt8,
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+SETTINGS index_granularity = 8192;
+INSERT INTO t2 VALUES ('OK', 1);
+
+-- { echoOn }
+SELECT * FROM m WHERE f = 0 SETTINGS optimize_move_to_prewhere=1;
+-- { echoOff }
--- a/Show More
+++ b/Show More